ac5113 commited on
Commit
99a05f0
1 Parent(s): 7c01a50

added files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +57 -0
  2. README.md +145 -12
  3. app.py +7 -0
  4. assets/overview.png +0 -0
  5. assets/teaser.png +0 -0
  6. common/constants.py +35 -0
  7. common/renderer_pyrd.py +112 -0
  8. configs/cfg_test.yml +28 -0
  9. configs/cfg_train.yml +29 -0
  10. data/base_dataset.py +164 -0
  11. data/mixed_dataset.py +42 -0
  12. data/preprocess/behave-30fps-error_frames.json +511 -0
  13. data/preprocess/behave.py +0 -0
  14. data/preprocess/behave_test/behave_simple_test.npz.py +37 -0
  15. data/preprocess/behave_test/split.json +327 -0
  16. data/preprocess/hot_dca.py +220 -0
  17. data/preprocess/hot_noprox.py +241 -0
  18. data/preprocess/hot_prox.py +217 -0
  19. data/preprocess/prepare_damon_behave_split.py +69 -0
  20. data/preprocess/rich_smplx.py +222 -0
  21. data/preprocess/rich_smplx_agniv.py +578 -0
  22. data/preprocess/yoga-82_test/yoga82_simple_test.npz.py +34 -0
  23. example_images/213.jpg +0 -0
  24. example_images/pexels-photo-15732209.jpeg +0 -0
  25. example_images/pexels-photo-207569.webp +0 -0
  26. example_images/pexels-photo-3622517.webp +0 -0
  27. fetch_data.sh +18 -0
  28. hot_analysis/.ipynb_checkpoints/hico_analysis-checkpoint.ipynb +307 -0
  29. hot_analysis/.ipynb_checkpoints/vcoco_analysis-checkpoint.ipynb +276 -0
  30. hot_analysis/agniv_pose_filter/hico.npy +3 -0
  31. hot_analysis/agniv_pose_filter/hot.npy +3 -0
  32. hot_analysis/agniv_pose_filter/hot_dict.pkl +3 -0
  33. hot_analysis/agniv_pose_filter/pq_wnp.npy +3 -0
  34. hot_analysis/agniv_pose_filter/vcoco.npy +3 -0
  35. hot_analysis/count_objects_per_img.py +35 -0
  36. hot_analysis/create_combined_objectwise_plots.ipynb +291 -0
  37. hot_analysis/create_part_probability_mesh.py +86 -0
  38. hot_analysis/damon_qc_stats/compute_accuracy_iou_damon.py +59 -0
  39. hot_analysis/damon_qc_stats/compute_fleiss_kappa_damon.py +111 -0
  40. hot_analysis/damon_qc_stats/qa_accuracy_gt_contact_combined.npz +3 -0
  41. hot_analysis/damon_qc_stats/quality_assurance_accuracy.csv +0 -0
  42. hot_analysis/damon_qc_stats/quality_assurance_fleiss.csv +0 -0
  43. hot_analysis/damon_qc_stats/successful_qualifications_fleiss.csv +0 -0
  44. hot_analysis/filtered_data/v_1/hico/hico_imglist_all_140223.txt +0 -0
  45. hot_analysis/filtered_data/v_1/hico/image_per_object_category.png +0 -0
  46. hot_analysis/filtered_data/v_1/hico/imgnames_per_object_dict.json +0 -0
  47. hot_analysis/filtered_data/v_1/hico/imgnames_per_object_dict.txt +0 -0
  48. hot_analysis/filtered_data/v_1/hico/object_per_image_dict.json +0 -0
  49. hot_analysis/filtered_data/v_1/hico/object_per_image_dict.txt +0 -0
  50. hot_analysis/filtered_data/v_1/hico_imglist_all_140223.txt +0 -0
LICENSE ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ License
2
+ Data & Software Copyright License for non-commercial scientific research purposes
3
+ Please read carefully the following terms and conditions and any accompanying documentation before you download and/or use the DECO data, models and software, (the "Data & Software"), including DECO baseline models, 3D meshes, MANO/SMPL-X parameters, images, MoCap data, videos, software, and scripts. By downloading and/or using the Data & Software (including downloading, cloning, installing, and any other use of the corresponding GitHub repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Data & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License
4
+
5
+ Ownership / Licensees
6
+ The Data & Software and the associated materials have been developed at the Max Planck Institute for Intelligent Systems (hereinafter "MPI").
7
+
8
+ Any copyright or patent right is owned by and proprietary material of the Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (hereinafter “MPG”; MPI and MPG hereinafter collectively “Max-Planck”)
9
+
10
+ hereinafter the “Licensor”.
11
+
12
+ License Grant
13
+ Licensor grants you (Licensee) personally a single-user, non-exclusive, non-transferable, free of charge right:
14
+
15
+ To install the Data & Software on computers owned, leased or otherwise controlled by you and/or your organization;
16
+ To use the Data & Software for the sole purpose of performing non-commercial scientific research, non-commercial education, or non-commercial artistic projects;
17
+ Any other use, in particular any use for commercial, pornographic, military, or surveillance, purposes is prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artifacts for commercial purposes. The Data & Software may not be used to create fake, libelous, misleading, or defamatory content of any kind excluding analyses in peer-reviewed scientific research. The Data & Software may not be reproduced, modified and/or made available in any form to any third party without Max-Planck’s prior written permission.
18
+
19
+ The Data & Software may not be used for pornographic purposes or to generate pornographic material whether commercial or not. This license also prohibits the use of the Data & Software to train methods/algorithms/neural networks/etc. for commercial, pornographic, military, surveillance, or defamatory use of any kind. By downloading the Data & Software, you agree not to reverse engineer it.
20
+
21
+ No Distribution
22
+ The Data & Software and the license herein granted shall not be copied, shared, distributed, re-sold, offered for re-sale, transferred or sub-licensed in whole or in part except that you may make one copy for archive purposes only.
23
+
24
+ Disclaimer of Representations and Warranties
25
+ You expressly acknowledge and agree that the Data & Software results from basic research, is provided “AS IS”, may contain errors, and that any use of the Data & Software is at your sole risk. LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE DATA & SOFTWARE, NEITHER EXPRESS NOR IMPLIED, AND THE ABSENCE OF ANY LEGAL OR ACTUAL DEFECTS, WHETHER DISCOVERABLE OR NOT. Specifically, and not to limit the foregoing, licensor makes no representations or warranties (i) regarding the merchantability or fitness for a particular purpose of the Data & Software, (ii) that the use of the Data & Software will not infringe any patents, copyrights or other intellectual property rights of a third party, and (iii) that the use of the Data & Software will not cause any damage of any kind to you or a third party.
26
+
27
+ Limitation of Liability
28
+ Because this Data & Software License Agreement qualifies as a donation, according to Section 521 of the German Civil Code (Bürgerliches Gesetzbuch – BGB) Licensor as a donor is liable for intent and gross negligence only. If the Licensor fraudulently conceals a legal or material defect, they are obliged to compensate the Licensee for the resulting damage.
29
+ Licensor shall be liable for loss of data only up to the amount of typical recovery costs which would have arisen had proper and regular data backup measures been taken. For the avoidance of doubt Licensor shall be liable in accordance with the German Product Liability Act in the event of product liability. The foregoing applies also to Licensor’s legal representatives or assistants in performance. Any further liability shall be excluded.
30
+ Patent claims generated through the usage of the Data & Software cannot be directed towards the copyright holders.
31
+ The Data & Software is provided in the state of development the licensor defines. If modified or extended by Licensee, the Licensor makes no claims about the fitness of the Data & Software and is not responsible for any problems such modifications cause.
32
+
33
+ No Maintenance Services
34
+ You understand and agree that Licensor is under no obligation to provide either maintenance services, update services, notices of latent defects, or corrections of defects with regard to the Data & Software. Licensor nevertheless reserves the right to update, modify, or discontinue the Data & Software at any time.
35
+
36
+ Defects of the Data & Software must be notified in writing to the Licensor with a comprehensible description of the error symptoms. The notification of the defect should enable the reproduction of the error. The Licensee is encouraged to communicate any use, results, modification, or publication.
37
+
38
+ Publications using the Data & Software
39
+ You acknowledge that the Data & Software is a valuable scientific resource and agree to appropriately reference the following paper in any publication making use of the Data & Software.
40
+
41
+ Subjects' Consent: All subjects gave informed written consent to share their data for research purposes. You further agree to delete data or change their use, in case a subject changes or withdraws their consent.
42
+
43
+ Citation:
44
+
45
+ @InProceedings{Tripathi_2023_ICCV,
46
+ author = {Tripathi, Shashank and Chatterjee, Agniv and Passy, Jean-Claude and Yi, Hongwei and Tzionas, Dimitrios and Black, Michael J.},
47
+ title = {DECO: Dense Estimation of 3D Human-Scene Contact In The Wild},
48
+ booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
49
+ month = {October},
50
+ year = {2023},
51
+ pages = {8001-8013}
52
+ }
53
+
54
+ Commercial licensing opportunities
55
+ For commercial uses of the Data & Software, please send emails to ps-license@tue.mpg.de
56
+
57
+ This Agreement shall be governed by the laws of the Federal Republic of Germany except for the UN Sales Convention.
README.md CHANGED
@@ -1,12 +1,145 @@
1
- ---
2
- title: DECO
3
- emoji: 🏢
4
- colorFrom: blue
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 3.47.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DECO: Dense Estimation of 3D Human-Scene Contact in the Wild [ICCV 2023 (Oral)]
2
+
3
+ > Code repository for the paper:
4
+ > [**DECO: Dense Estimation of 3D Human-Scene Contact in the Wild**](https://openaccess.thecvf.com/content/ICCV2023/html/Tripathi_DECO_Dense_Estimation_of_3D_Human-Scene_Contact_In_The_Wild_ICCV_2023_paper.html)
5
+ > [Shashank Tripathi](https://sha2nkt.github.io/), [Agniv Chatterjee](https://ac5113.github.io/), [Jean-Claude Passy](https://is.mpg.de/person/jpassy), [Hongwei Yi](https://xyyhw.top/), [Dimitrios Tzionas](https://ps.is.mpg.de/person/dtzionas), [Michael J. Black](https://ps.is.mpg.de/person/black)<br />
6
+ > *IEEE International Conference on Computer Vision (ICCV), 2023*
7
+
8
+ [![arXiv](https://img.shields.io/badge/arXiv-2309.15273-00ff00.svg)](https://arxiv.org/abs/2309.15273) [![Website shields.io](https://img.shields.io/website-up-down-green-red/http/shields.io.svg)](https://deco.is.tue.mpg.de/) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]() [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)]()
9
+
10
+ ![teaser](assets/teaser.png)
11
+
12
+ [[Project Page](https://deco.is.tue.mpg.de)] [[Paper](https://arxiv.org/abs/2309.15273)] [[Video](https://www.youtube.com/watch?v=o7MLobqAFTQ)] [[Poster](https://www.dropbox.com/scl/fi/kvhpfnkvga2pt19ayko8u/ICCV2023_DECO_Poster_v2.pptx?rlkey=ihbf3fi6u9j0ha9x1gfk2cwd0&dl=0)] [[License](https://deco.is.tue.mpg.de/license.html)] [[Contact](mailto:deco@tue.mpg.de)]
13
+
14
+ ## Installation and Setup
15
+ 1. First, clone the repo. Then, we recommend creating a clean [conda](https://docs.conda.io/) environment, activating it and installing torch and torchvision, as follows:
16
+ ```shell
17
+ git clone https://github.com/sha2nkt/deco.git
18
+ cd deco
19
+ conda create -n deco python=3.9 -y
20
+ conda activate deco
21
+ pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
22
+ ```
23
+ Please adjust the CUDA version as required.
24
+
25
+ 2. Install PyTorch3D from source. Users may also refer to [PyTorch3D-install](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md) for more details.
26
+ However, our tests show that installing using ``conda`` sometimes runs into dependency conflicts.
27
+ Hence, users may alternatively install Pytorch3D from source following the steps below.
28
+ ```shell
29
+ git clone https://github.com/facebookresearch/pytorch3d.git
30
+ cd pytorch3d
31
+ pip install .
32
+ cd ..
33
+ ```
34
+
35
+ 3. Install the other dependancies and download the required data.
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ sh fetch_data.sh
39
+ ```
40
+
41
+ 4. Please download [SMPL](https://smpl.is.tue.mpg.de/) (version 1.1.0) and [SMPL-X](https://smpl-x.is.tue.mpg.de/) (v1.1) files into the data folder. Please rename the SMPL files to ```SMPL_FEMALE.pkl```, ```SMPL_MALE.pkl``` and ```SMPL_NEUTRAL.pkl```. The directory structure for the ```data``` folder has been elaborated below:
42
+
43
+ ```
44
+ ├── preprocess
45
+ ├── smpl
46
+ │ ├── SMPL_FEMALE.pkl
47
+ │ ├── SMPL_MALE.pkl
48
+ │ ├── SMPL_NEUTRAL.pkl
49
+ │ ├── smpl_neutral_geodesic_dist.npy
50
+ │ ├── smpl_neutral_tpose.ply
51
+ │ ├── smplpix_vertex_colors.npy
52
+ ├── smplx
53
+ │ ├── SMPLX_FEMALE.npz
54
+ │ ├── SMPLX_FEMALE.pkl
55
+ │ ├── SMPLX_MALE.npz
56
+ │ ├── SMPLX_MALE.pkl
57
+ │ ├── SMPLX_NEUTRAL.npz
58
+ │ ├── SMPLX_NEUTRAL.pkl
59
+ │ ├── smplx_neutral_tpose.ply
60
+ ├── weights
61
+ │ ├── pose_hrnet_w32_256x192.pth
62
+ ├── J_regressor_extra.npy
63
+ ├── base_dataset.py
64
+ ├── mixed_dataset.py
65
+ ├── smpl_partSegmentation_mapping.pkl
66
+ ├── smpl_vert_segmentation.json
67
+ └── smplx_vert_segmentation.json
68
+ ```
69
+
70
+ ## Run demo on images
71
+ The following command will run DECO on all images in the specified `--img_src`, and save rendering and colored mesh in `--out_dir`. The `--model_path` flag is used to specify the specific checkpoint being used. Additionally, the base mesh color and the color of predicted contact annotation can be specified using the `--mesh_colour` and `--annot_colour` flags respectively.
72
+ ```bash
73
+ python inference.py \
74
+ --img_src example_images \
75
+ --out_dir demo_out
76
+ ```
77
+
78
+ ## Training and Evaluation
79
+
80
+ We release 3 versions of the DECO model:
81
+ <ol>
82
+ <li> DECO-HRNet (<em> Best performing model </em>) </li>
83
+ <li> DECO-HRNet w/o context branches </li>
84
+ <li> DECO-Swin </li>
85
+ </ol>
86
+
87
+ All the checkpoints have been downloaded to ```checkpoints```.
88
+ However, please note that versions 2 and 3 have been trained solely on the RICH dataset. <br>
89
+ We recommend using the first DECO version.
90
+
91
+ The dataset npz files have been downloaded to ```datasets/Release_Datasets```. Please download the actual DAMON data and place them in ```datasets``` following the instructions given.
92
+
93
+ ### Evaluation
94
+ To run evaluation on the DAMON dataset, please run the following command:
95
+
96
+ ```bash
97
+ python tester.py --cfg configs/cfg_test.yml
98
+ ```
99
+
100
+ ### Training
101
+ The config provided (```cfg_train.yml```) is set to train and evaluate on all three datasets: DAMON, RICH and PROX. To change this, please change the value of the key ```TRAINING.DATASETS``` and ```VALIDATION.DATASETS``` in the config (please also change ```TRAINING.DATASET_MIX_PDF``` as required). <br>
102
+ Also, the best checkpoint is stored by default at ```checkpoints/Other_Checkpoints```.
103
+ Please run the following command to start training of the DECO model:
104
+
105
+ ```bash
106
+ python train.py --cfg configs/cfg_train.yml
107
+ ```
108
+
109
+ ### Training on custom datasets
110
+
111
+ To train on other datasets, please follow these steps:
112
+ 1. Please create an npz of the dataset, following the structure of the datasets in ```datasets/Release_Datasets``` with the corresponding keys and values.
113
+ 2. Please create scene segmentation maps, if not available. We have used [Mask2Former](https://github.com/facebookresearch/Mask2Former) in our work.
114
+ 3. For creating the part segmentation maps, this [sample script](https://github.com/sha2nkt/deco/blob/main/scripts/datascripts/get_part_seg_mask.py) can be referred to.
115
+ 4. Add the dataset name(s) to ```train.py``` ([these lines](https://github.com/sha2nkt/deco/blob/d5233ecfad1f51b71a50a78c0751420067e82c02/train.py#L83)), ```tester.py``` ([these lines](https://github.com/sha2nkt/deco/blob/d5233ecfad1f51b71a50a78c0751420067e82c02/tester.py#L51)) and ```data/mixed_dataset.py``` ([these lines](https://github.com/sha2nkt/deco/blob/d5233ecfad1f51b71a50a78c0751420067e82c02/data/mixed_dataset.py#L17)), according to the body model being used (SMPL/SMPL-X)
116
+ 5. Add the path(s) to the dataset npz(s) to ```common/constants.py``` ([these lines](https://github.com/sha2nkt/deco/blob/d5233ecfad1f51b71a50a78c0751420067e82c02/common/constants.py#L19)).
117
+ 6. Finally, change ```TRAINING.DATASETS``` and ```VALIDATION.DATASETS``` in the config file and you're good to go!
118
+
119
+ ## Citing
120
+ If you find this code useful for your research, please consider citing the following paper:
121
+
122
+ ```bibtex
123
+ @InProceedings{Tripathi_2023_ICCV,
124
+ author = {Tripathi, Shashank and Chatterjee, Agniv and Passy, Jean-Claude and Yi, Hongwei and Tzionas, Dimitrios and Black, Michael J.},
125
+ title = {DECO: Dense Estimation of 3D Human-Scene Contact In The Wild},
126
+ booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
127
+ month = {October},
128
+ year = {2023},
129
+ pages = {8001-8013}
130
+ }
131
+ ```
132
+
133
+ ### License
134
+
135
+ See [LICENSE](LICENSE).
136
+
137
+ ### Acknowledgments
138
+
139
+ We sincerely thank Alpar Cseke for his contributions to DAMON data collection and PHOSA evaluations, Sai K. Dwivedi for facilitating PROX downstream experiments, Xianghui Xie for his generous help with CHORE evaluations, Lea Muller for her help in initiating the contact annotation tool, Chun-Hao P. Huang for RICH discussions and Yixin Chen for details about the HOT paper. We are grateful to Mengqin Xue and Zhenyu Lou for their collaboration in BEHAVE evaluations, Joachim Tesch and Nikos Athanasiou for insightful visualization advice, and Tsvetelina Alexiadis for valuable data collection guidance. Their invaluable contributions enriched this research significantly. We also thank Benjamin Pellkofer for help with the website and IT support. This work was funded by the International Max Planck Research School for Intelligent Systems (IMPRS-IS).
140
+
141
+ ### Contact
142
+
143
+ For technical questions, please create an issue. For other questions, please contact `deco@tue.mpg.de`.
144
+
145
+ For commercial licensing, please contact `ps-licensing@tue.mpg.de`.
app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ iface.launch()
assets/overview.png ADDED
assets/teaser.png ADDED
common/constants.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os.path import join
2
+
3
+ DIST_MATRIX_PATH = 'data/smpl/smpl_neutral_geodesic_dist.npy'
4
+ SMPL_MEAN_PARAMS = 'data/smpl_mean_params.npz'
5
+ SMPL_MODEL_DIR = 'data/smpl/'
6
+ SMPLX_MODEL_DIR = 'data/smplx/'
7
+
8
+ N_PARTS = 24
9
+
10
+ # Mean and standard deviation for normalizing input image
11
+ IMG_NORM_MEAN = [0.485, 0.456, 0.406]
12
+ IMG_NORM_STD = [0.229, 0.224, 0.225]
13
+
14
+ # Output folder to save test/train npz files
15
+ DATASET_NPZ_PATH = 'datasets/Release_Datasets'
16
+ CONTACT_MAPPING_PATH = 'data/conversions'
17
+
18
+ # Path to test/train npz files
19
+ DATASET_FILES = {
20
+ 'train': {
21
+ 'damon': join(DATASET_NPZ_PATH, 'damon/hot_dca_trainval.npz'),
22
+ 'rich': join(DATASET_NPZ_PATH, 'rich/rich_train_smplx_cropped_bmp.npz'),
23
+ 'prox': join(DATASET_NPZ_PATH, 'prox/prox_train_smplx_ds4.npz'),
24
+ },
25
+ 'val': {
26
+ 'damon': join(DATASET_NPZ_PATH, 'damon/hot_dca_test.npz'),
27
+ 'rich': join(DATASET_NPZ_PATH, 'rich/rich_test_smplx_cropped_bmp.npz'),
28
+ 'prox': join(DATASET_NPZ_PATH, 'prox/prox_val_smplx_ds4.npz'),
29
+ },
30
+ 'test': {
31
+ 'damon': join(DATASET_NPZ_PATH, 'damon/hot_dca_test.npz'),
32
+ 'rich': join(DATASET_NPZ_PATH, 'rich/rich_test_smplx_cropped_bmp.npz'),
33
+ 'prox': join(DATASET_NPZ_PATH, 'prox/prox_val_smplx_ds4.npz'),
34
+ },
35
+ }
common/renderer_pyrd.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
2
+
3
+ # This program is free software; you can redistribute it and/or modify it
4
+ # under the terms of the MIT license.
5
+
6
+ # This program is distributed in the hope that it will be useful, but WITHOUT ANY
7
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8
+ # PARTICULAR PURPOSE. See the MIT License for more details.
9
+
10
+ import os
11
+ import trimesh
12
+ import pyrender
13
+ import numpy as np
14
+ import colorsys
15
+ import cv2
16
+
17
+
18
+ class Renderer(object):
19
+
20
+ def __init__(self, focal_length=600, img_w=512, img_h=512, faces=None,
21
+ same_mesh_color=False):
22
+ os.environ['PYOPENGL_PLATFORM'] = 'egl'
23
+ self.renderer = pyrender.OffscreenRenderer(viewport_width=img_w,
24
+ viewport_height=img_h,
25
+ point_size=1.0)
26
+ self.camera_center = [img_w // 2, img_h // 2]
27
+ self.focal_length = focal_length
28
+ self.faces = faces
29
+ self.same_mesh_color = same_mesh_color
30
+
31
+ def render_front_view(self, verts, bg_img_rgb=None, bg_color=(0, 0, 0, 0), vertex_colors=None, render_part_seg=False, part_label_bins=None):
32
+ # Create a scene for each image and render all meshes
33
+ scene = pyrender.Scene(bg_color=bg_color, ambient_light=np.ones(3) * (1 if render_part_seg else 0))
34
+ # Create camera. Camera will always be at [0,0,0]
35
+ camera = pyrender.camera.IntrinsicsCamera(fx=self.focal_length, fy=self.focal_length,
36
+ cx=self.camera_center[0], cy=self.camera_center[1])
37
+ scene.add(camera, pose=np.eye(4))
38
+
39
+ # Create light source
40
+ if not render_part_seg:
41
+ light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=3.0)
42
+ # for DirectionalLight, only rotation matters
43
+ light_pose = trimesh.transformations.rotation_matrix(np.radians(-45), [1, 0, 0])
44
+ scene.add(light, pose=light_pose)
45
+ light_pose = trimesh.transformations.rotation_matrix(np.radians(45), [0, 1, 0])
46
+ scene.add(light, pose=light_pose)
47
+
48
+ # Need to flip x-axis
49
+ rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
50
+ # multiple person
51
+ num_people = len(verts)
52
+
53
+ # for every person in the scene
54
+ for n in range(num_people):
55
+ mesh = trimesh.Trimesh(verts[n], self.faces, process=False)
56
+ mesh.apply_transform(rot)
57
+ if self.same_mesh_color:
58
+ mesh_color = colorsys.hsv_to_rgb(0.6, 0.5, 1.0)
59
+ else:
60
+ mesh_color = colorsys.hsv_to_rgb(float(n) / num_people, 0.5, 1.0)
61
+ material = pyrender.MetallicRoughnessMaterial(
62
+ metallicFactor=1.0,
63
+ alphaMode='OPAQUE',
64
+ baseColorFactor=mesh_color)
65
+
66
+ if vertex_colors is not None:
67
+ # color individual vertices based on part labels
68
+ mesh.visual.vertex_colors = vertex_colors
69
+ mesh = pyrender.Mesh.from_trimesh(mesh, material=material, wireframe=False)
70
+ scene.add(mesh, 'mesh')
71
+
72
+ # Alpha channel was not working previously, need to check again
73
+ # Until this is fixed use hack with depth image to get the opacity
74
+ color_rgba, depth_map = self.renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
75
+ color_rgb = color_rgba[:, :, :3]
76
+
77
+ if render_part_seg:
78
+ body_parts = color_rgb.copy()
79
+ # make single channel
80
+ body_parts = body_parts.max(-1) # reduce to single channel
81
+ # convert pixel value to bucket indices
82
+ # body_parts = torch.bucketize(body_parts, self.part_label_bins, right=True)
83
+ body_parts = np.digitize(body_parts, part_label_bins, right=True)
84
+ # part labels start from 2 because of the binning scheme. Subtract 1 from all non-zero labels to make label
85
+ # go from 1 to 24. 0 is background
86
+ # handle background coinciding with hip label = 0
87
+ body_parts = body_parts + 1
88
+ mask = depth_map > 0
89
+ body_parts = body_parts * mask
90
+ return body_parts, color_rgb
91
+
92
+ if bg_img_rgb is None:
93
+ return color_rgb
94
+ else:
95
+ mask = depth_map > 0
96
+ bg_img_rgb[mask] = color_rgb[mask]
97
+ return bg_img_rgb
98
+
99
+ def render_side_view(self, verts):
100
+ centroid = verts.mean(axis=(0, 1)) # n*6890*3 -> 3
101
+ # make the centroid at the image center (the X and Y coordinates are zeros)
102
+ centroid[:2] = 0
103
+ aroundy = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0][np.newaxis, ...] # 1*3*3
104
+ pred_vert_arr_side = np.matmul((verts - centroid), aroundy) + centroid
105
+ side_view = self.render_front_view(pred_vert_arr_side)
106
+ return side_view
107
+
108
+ def delete(self):
109
+ """
110
+ Need to delete before creating the renderer next time
111
+ """
112
+ self.renderer.delete()
configs/cfg_test.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXP_NAME: 'damon_hrnet_testing'
2
+ PROJECT_NAME: 'DECO_DAMON_Testing'
3
+ OUTPUT_DIR: 'deco_results'
4
+ CONDOR_DIR: ''
5
+ DATASET:
6
+ BATCH_SIZE: 16
7
+ NUM_WORKERS: 4
8
+ NORMALIZE_IMAGES: [True]
9
+ OPTIMIZER:
10
+ TYPE: 'adam'
11
+ LR: [5e-5]
12
+ NUM_UPDATE_LR: 3
13
+ TRAINING:
14
+ ENCODER: 'hrnet'
15
+ CONTEXT: [True]
16
+ NUM_EPOCHS: 1
17
+ NUM_EARLY_STOP: 10
18
+ SUMMARY_STEPS: 5
19
+ CHECKPOINT_EPOCHS: 5
20
+ DATASETS: ['damon']
21
+ DATASET_MIX_PDF: ['1.']
22
+ DATASET_ROOT_PATH: ''
23
+ BEST_MODEL_PATH: './checkpoints/Release_Checkpoint/deco_best.pth'
24
+ PAL_LOSS_WEIGHTS: 0.0
25
+ VALIDATION:
26
+ SUMMARY_STEPS: 1000
27
+ DATASETS: ['damon']
28
+ MAIN_DATASET: 'damon'
configs/cfg_train.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXP_NAME: 'demo_train'
2
+ PROJECT_NAME: 'DECO_demo_training'
3
+ OUTPUT_DIR: 'deco_results'
4
+ CONDOR_DIR: ''
5
+ DATASET:
6
+ BATCH_SIZE: 4
7
+ NUM_WORKERS: 8
8
+ NORMALIZE_IMAGES: [True]
9
+ OPTIMIZER:
10
+ TYPE: 'adam'
11
+ LR: [1e-5]
12
+ NUM_UPDATE_LR: 3
13
+ TRAINING:
14
+ ENCODER: 'hrnet'
15
+ CONTEXT: [True]
16
+ NUM_EPOCHS: 100
17
+ NUM_EARLY_STOP: 10
18
+ SUMMARY_STEPS: 5
19
+ CHECKPOINT_EPOCHS: 5
20
+ DATASETS: ['damon', 'rich', 'prox']
21
+ DATASET_MIX_PDF: ['0.4', '0.3', '0.3'] # should sum to 1.0 unless you want to weight by dataset size
22
+ DATASET_ROOT_PATH: ''
23
+ BEST_MODEL_PATH: './checkpoints/Other_Checkpoints/demo_train.pth'
24
+ LOSS_WEIGHTS: 1.
25
+ PAL_LOSS_WEIGHTS: 0.01
26
+ VALIDATION:
27
+ SUMMARY_STEPS: 5
28
+ DATASETS: ['damon', 'rich', 'prox']
29
+ MAIN_DATASET: 'damon'
data/base_dataset.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import numpy as np
4
+ from torch.utils.data import Dataset
5
+ from torchvision.transforms import Normalize
6
+ from common import constants
7
+
8
+ def mask_split(img, num_parts):
9
+ if not len(img.shape) == 2:
10
+ img = img[:, :, 0]
11
+ mask = np.zeros((img.shape[0], img.shape[1], num_parts))
12
+ for i in np.unique(img):
13
+ mask[:, :, i] = np.where(img == i, 1., 0.)
14
+ return np.transpose(mask, (2, 0, 1))
15
+
16
+ class BaseDataset(Dataset):
17
+
18
+ def __init__(self, dataset, mode, model_type='smpl', normalize=False):
19
+ self.dataset = dataset
20
+ self.mode = mode
21
+
22
+ print(f'Loading dataset: {constants.DATASET_FILES[mode][dataset]} for mode: {mode}')
23
+
24
+ self.data = np.load(constants.DATASET_FILES[mode][dataset], allow_pickle=True)
25
+
26
+ self.images = self.data['imgname']
27
+
28
+ # get 3d contact labels, if available
29
+ try:
30
+ self.contact_labels_3d = self.data['contact_label']
31
+ # make a has_contact_3d numpy array which contains 1 if contact labels are no empty and 0 otherwise
32
+ self.has_contact_3d = np.array([1 if len(x) > 0 else 0 for x in self.contact_labels_3d])
33
+ except KeyError:
34
+ self.has_contact_3d = np.zeros(len(self.images))
35
+
36
+ # get 2d polygon contact labels, if available
37
+ try:
38
+ self.polygon_contacts_2d = self.data['polygon_2d_contact']
39
+ self.has_polygon_contact_2d = np.ones(len(self.images))
40
+ except KeyError:
41
+ self.has_polygon_contact_2d = np.zeros(len(self.images))
42
+
43
+ # Get camera parameters - only intrinsics for now
44
+ try:
45
+ self.cam_k = self.data['cam_k']
46
+ except KeyError:
47
+ self.cam_k = np.zeros((len(self.images), 3, 3))
48
+
49
+ self.sem_masks = self.data['scene_seg']
50
+ self.part_masks = self.data['part_seg']
51
+
52
+ # Get gt SMPL parameters, if available
53
+ try:
54
+ self.pose = self.data['pose'].astype(float)
55
+ self.betas = self.data['shape'].astype(float)
56
+ self.transl = self.data['transl'].astype(float)
57
+ if 'has_smpl' in self.data:
58
+ self.has_smpl = self.data['has_smpl']
59
+ else:
60
+ self.has_smpl = np.ones(len(self.images))
61
+ self.is_smplx = np.ones(len(self.images)) if model_type == 'smplx' else np.zeros(len(self.images))
62
+ except KeyError:
63
+ self.has_smpl = np.zeros(len(self.images))
64
+ self.is_smplx = np.zeros(len(self.images))
65
+
66
+ if model_type == 'smpl':
67
+ self.n_vertices = 6890
68
+ elif model_type == 'smplx':
69
+ self.n_vertices = 10475
70
+ else:
71
+ raise NotImplementedError
72
+
73
+ self.normalize = normalize
74
+ self.normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD)
75
+
76
+ def __getitem__(self, index):
77
+ item = {}
78
+
79
+ # Load image
80
+ img_path = self.images[index]
81
+ try:
82
+ img = cv2.imread(img_path)
83
+ img_h, img_w, _ = img.shape
84
+ img = cv2.resize(img, (256, 256), cv2.INTER_CUBIC)
85
+ img = img.transpose(2, 0, 1) / 255.0
86
+ except:
87
+ print('Img: ', img_path)
88
+
89
+ img_scale_factor = np.array([256 / img_w, 256 / img_h])
90
+
91
+ # Get SMPL parameters, if available
92
+ if self.has_smpl[index]:
93
+ pose = self.pose[index].copy()
94
+ betas = self.betas[index].copy()
95
+ transl = self.transl[index].copy()
96
+ else:
97
+ pose = np.zeros(72)
98
+ betas = np.zeros(10)
99
+ transl = np.zeros(3)
100
+
101
+ # Load vertex_contact
102
+ if self.has_contact_3d[index]:
103
+ contact_label_3d = self.contact_labels_3d[index]
104
+ else:
105
+ contact_label_3d = np.zeros(self.n_vertices)
106
+
107
+ sem_mask_path = self.sem_masks[index]
108
+ try:
109
+ sem_mask = cv2.imread(sem_mask_path)
110
+ sem_mask = cv2.resize(sem_mask, (256, 256), cv2.INTER_CUBIC)
111
+ sem_mask = mask_split(sem_mask, 133)
112
+ except:
113
+ print('Scene seg: ', sem_mask_path)
114
+
115
+ try:
116
+ part_mask_path = self.part_masks[index]
117
+ part_mask = cv2.imread(part_mask_path)
118
+ part_mask = cv2.resize(part_mask, (256, 256), cv2.INTER_CUBIC)
119
+ part_mask = mask_split(part_mask, 26)
120
+ except:
121
+ print('Part seg: ', part_mask_path)
122
+
123
+ try:
124
+ if self.has_polygon_contact_2d[index]:
125
+ polygon_contact_2d_path = self.polygon_contacts_2d[index]
126
+ polygon_contact_2d = cv2.imread(polygon_contact_2d_path)
127
+ polygon_contact_2d = cv2.resize(polygon_contact_2d, (256, 256), cv2.INTER_NEAREST)
128
+ # binarize the part mask
129
+ polygon_contact_2d = np.where(polygon_contact_2d > 0, 1, 0)
130
+ else:
131
+ polygon_contact_2d = np.zeros((256, 256, 3))
132
+ except:
133
+ print('2D polygon contact: ', polygon_contact_2d_path)
134
+
135
+ if self.normalize:
136
+ img = torch.tensor(img, dtype=torch.float32)
137
+ item['img'] = self.normalize_img(img)
138
+ else:
139
+ item['img'] = torch.tensor(img, dtype=torch.float32)
140
+
141
+ if self.is_smplx[index]:
142
+ # Add 6 zeros to the end of the pose vector to match with smpl
143
+ pose = np.concatenate((pose, np.zeros(6)))
144
+
145
+ item['img_path'] = img_path
146
+ item['pose'] = torch.tensor(pose, dtype=torch.float32)
147
+ item['betas'] = torch.tensor(betas, dtype=torch.float32)
148
+ item['transl'] = torch.tensor(transl, dtype=torch.float32)
149
+ item['cam_k'] = self.cam_k[index]
150
+ item['img_scale_factor'] = torch.tensor(img_scale_factor, dtype=torch.float32)
151
+ item['contact_label_3d'] = torch.tensor(contact_label_3d, dtype=torch.float32)
152
+ item['sem_mask'] = torch.tensor(sem_mask, dtype=torch.float32)
153
+ item['part_mask'] = torch.tensor(part_mask, dtype=torch.float32)
154
+ item['polygon_contact_2d'] = torch.tensor(polygon_contact_2d, dtype=torch.float32)
155
+
156
+ item['has_smpl'] = self.has_smpl[index]
157
+ item['is_smplx'] = self.is_smplx[index]
158
+ item['has_contact_3d'] = self.has_contact_3d[index]
159
+ item['has_polygon_contact_2d'] = self.has_polygon_contact_2d[index]
160
+
161
+ return item
162
+
163
+ def __len__(self):
164
+ return len(self.images)
data/mixed_dataset.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains the definition of different heterogeneous datasets used for training
3
+ """
4
+ import torch
5
+ import numpy as np
6
+
7
+ from .base_dataset import BaseDataset
8
+
9
+ class MixedDataset(torch.utils.data.Dataset):
10
+
11
+ def __init__(self, ds_list, mode, dataset_mix_pdf, **kwargs):
12
+ self.dataset_list = ds_list
13
+ print('Training Dataset list: ', self.dataset_list)
14
+ self.num_datasets = len(self.dataset_list)
15
+
16
+ self.datasets = []
17
+ for ds in self.dataset_list:
18
+ if ds in ['rich', 'prox']:
19
+ self.datasets.append(BaseDataset(ds, mode, model_type='smplx', **kwargs))
20
+ elif ds in ['damon']:
21
+ self.datasets.append(BaseDataset(ds, mode, model_type='smpl', **kwargs))
22
+ else:
23
+ raise ValueError('Dataset not supported')
24
+
25
+ total_length = sum([len(ds) for ds in self.datasets])
26
+ length_itw = sum([len(ds) for ds in self.datasets])
27
+ self.length = max([len(ds) for ds in self.datasets])
28
+
29
+ # convert list of strings to list of floats
30
+ self.partition = [float(i) for i in dataset_mix_pdf] # should sum to 1.0 unless you want to weight by dataset size
31
+ assert sum(self.partition) == 1.0, "Dataset Mix PDF must sum to 1.0 unless you want to weight by dataset size"
32
+ assert len(self.partition) == self.num_datasets, "Number of partitions must be equal to number of datasets"
33
+ self.partition = np.array(self.partition).cumsum()
34
+
35
+ def __getitem__(self, index):
36
+ p = np.random.rand()
37
+ for i in range(self.num_datasets):
38
+ if p <= self.partition[i]:
39
+ return self.datasets[i][index % len(self.datasets[i])]
40
+
41
+ def __len__(self):
42
+ return self.length
data/preprocess/behave-30fps-error_frames.json ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "error_seqs": [
3
+ "Date01_Sub01_boxsmall_hand",
4
+ "Date01_Sub01_boxtiny_hand",
5
+ "Date02_Sub02_backpack_back",
6
+ "Date02_Sub02_backpack_hand",
7
+ "Date02_Sub02_backpack_twohand",
8
+ "Date02_Sub02_boxtiny_hand",
9
+ "Date02_Sub02_monitor_move2",
10
+ "Date02_Sub02_plasticcontainer",
11
+ "Date02_Sub02_toolbox",
12
+ "Date02_Sub02_toolbox_part2",
13
+ "Date02_Sub02_yogamat",
14
+ "Date03_Sub03_boxlong",
15
+ "Date03_Sub03_plasticcontainer",
16
+ "Date03_Sub04_boxsmall",
17
+ "Date03_Sub04_boxtiny",
18
+ "Date03_Sub04_boxtiny_part2",
19
+ "Date03_Sub04_plasticcontainer_lift",
20
+ "Date03_Sub04_toolbox",
21
+ "Date03_Sub05_boxsmall",
22
+ "Date03_Sub05_boxtiny",
23
+ "Date04_Sub05_boxlarge",
24
+ "Date04_Sub05_boxlong",
25
+ "Date04_Sub05_boxtiny",
26
+ "Date04_Sub05_stool",
27
+ "Date05_Sub06_boxlong",
28
+ "Date05_Sub06_boxtiny",
29
+ "Date05_Sub06_toolbox",
30
+ "Date06_Sub07_boxlong",
31
+ "Date06_Sub07_boxtiny",
32
+ "Date06_Sub07_stool_sit",
33
+ "Date06_Sub07_suitcase_lift",
34
+ "Date06_Sub07_toolbox",
35
+ "Date06_Sub07_yogamat",
36
+ "Date07_Sub04_boxlong",
37
+ "Date07_Sub04_boxsmall",
38
+ "Date07_Sub04_boxtiny",
39
+ "Date07_Sub04_plasticcontainer",
40
+ "Date07_Sub04_suitcase_lift",
41
+ "Date07_Sub05_suitcase_open",
42
+ "Date07_Sub05_tablesmall",
43
+ "Date07_Sub08_boxtiny",
44
+ "Date07_Sub08_yogamat"
45
+ ],
46
+ "good_quality": [
47
+ "Date01_Sub01_backpack_back",
48
+ "Date01_Sub01_backpack_hand",
49
+ "Date01_Sub01_backpack_hug",
50
+ "Date01_Sub01_boxlarge_hand",
51
+ "Date01_Sub01_boxlong_hand",
52
+ "Date01_Sub01_boxmedium_hand",
53
+ "Date01_Sub01_chairblack_hand",
54
+ "Date01_Sub01_chairblack_lift",
55
+ "Date01_Sub01_chairblack_sit",
56
+ "Date01_Sub01_chairwood_hand",
57
+ "Date01_Sub01_chairwood_lift",
58
+ "Date01_Sub01_chairwood_sit",
59
+ "Date01_Sub01_monitor_hand",
60
+ "Date01_Sub01_monitor_move",
61
+ "Date01_Sub01_plasticcontainer",
62
+ "Date01_Sub01_stool_move",
63
+ "Date01_Sub01_stool_sit",
64
+ "Date01_Sub01_suitcase",
65
+ "Date01_Sub01_suitcase_lift",
66
+ "Date01_Sub01_tablesmall_lean",
67
+ "Date01_Sub01_tablesmall_lift",
68
+ "Date01_Sub01_tablesmall_move",
69
+ "Date01_Sub01_tablesquare_hand",
70
+ "Date01_Sub01_tablesquare_lift",
71
+ "Date01_Sub01_tablesquare_sit",
72
+ "Date01_Sub01_toolbox",
73
+ "Date01_Sub01_trashbin",
74
+ "Date01_Sub01_yogaball",
75
+ "Date01_Sub01_yogaball_play",
76
+ "Date01_Sub01_yogamat_hand",
77
+ "Date02_Sub02_boxlarge_hand",
78
+ "Date02_Sub02_boxlong_hand",
79
+ "Date02_Sub02_boxmedium_hand",
80
+ "Date02_Sub02_boxsmall_hand",
81
+ "Date02_Sub02_chairblack_hand",
82
+ "Date02_Sub02_chairblack_lift",
83
+ "Date02_Sub02_chairblack_sit",
84
+ "Date02_Sub02_chairwood_hand",
85
+ "Date02_Sub02_chairwood_sit",
86
+ "Date02_Sub02_monitor_hand",
87
+ "Date02_Sub02_monitor_move",
88
+ "Date02_Sub02_stool_move",
89
+ "Date02_Sub02_stool_sit",
90
+ "Date02_Sub02_suitcase_ground",
91
+ "Date02_Sub02_suitcase_lift",
92
+ "Date02_Sub02_tablesmall_lean",
93
+ "Date02_Sub02_tablesmall_lift",
94
+ "Date02_Sub02_tablesmall_move",
95
+ "Date02_Sub02_tablesquare_lift",
96
+ "Date02_Sub02_tablesquare_move",
97
+ "Date02_Sub02_tablesquare_sit",
98
+ "Date02_Sub02_trashbin",
99
+ "Date02_Sub02_yogaball_play",
100
+ "Date02_Sub02_yogaball_sit",
101
+ "Date03_Sub03_backpack_back",
102
+ "Date03_Sub03_backpack_hand",
103
+ "Date03_Sub03_backpack_hug",
104
+ "Date03_Sub03_boxlarge",
105
+ "Date03_Sub03_boxmedium",
106
+ "Date03_Sub03_boxsmall",
107
+ "Date03_Sub03_boxtiny",
108
+ "Date03_Sub03_chairblack_hand",
109
+ "Date03_Sub03_chairblack_lift",
110
+ "Date03_Sub03_chairblack_sit",
111
+ "Date03_Sub03_chairblack_sitstand",
112
+ "Date03_Sub03_chairwood_hand",
113
+ "Date03_Sub03_chairwood_lift",
114
+ "Date03_Sub03_chairwood_sit",
115
+ "Date03_Sub03_monitor_move",
116
+ "Date03_Sub03_stool_lift",
117
+ "Date03_Sub03_stool_sit",
118
+ "Date03_Sub03_suitcase_lift",
119
+ "Date03_Sub03_suitcase_move",
120
+ "Date03_Sub03_tablesmall_lean",
121
+ "Date03_Sub03_tablesmall_lift",
122
+ "Date03_Sub03_tablesmall_move",
123
+ "Date03_Sub03_tablesquare_lift",
124
+ "Date03_Sub03_tablesquare_move",
125
+ "Date03_Sub03_tablesquare_sit",
126
+ "Date03_Sub03_toolbox",
127
+ "Date03_Sub03_trashbin",
128
+ "Date03_Sub03_yogaball_play",
129
+ "Date03_Sub03_yogaball_sit",
130
+ "Date03_Sub03_yogamat",
131
+ "Date03_Sub04_backpack_back",
132
+ "Date03_Sub04_backpack_hand",
133
+ "Date03_Sub04_backpack_hug",
134
+ "Date03_Sub04_boxlarge",
135
+ "Date03_Sub04_boxlong",
136
+ "Date03_Sub04_boxmedium",
137
+ "Date03_Sub04_chairblack_hand",
138
+ "Date03_Sub04_chairblack_liftreal",
139
+ "Date03_Sub04_chairblack_sit",
140
+ "Date03_Sub04_chairwood_hand",
141
+ "Date03_Sub04_chairwood_lift",
142
+ "Date03_Sub04_chairwood_sit",
143
+ "Date03_Sub04_monitor_hand",
144
+ "Date03_Sub04_monitor_move",
145
+ "Date03_Sub04_stool_move",
146
+ "Date03_Sub04_stool_sit",
147
+ "Date03_Sub04_suitcase_ground",
148
+ "Date03_Sub04_suitcase_lift",
149
+ "Date03_Sub04_tablesmall_hand",
150
+ "Date03_Sub04_tablesmall_lean",
151
+ "Date03_Sub04_tablesmall_lift",
152
+ "Date03_Sub04_tablesquare_hand",
153
+ "Date03_Sub04_tablesquare_lift",
154
+ "Date03_Sub04_tablesquare_sit",
155
+ "Date03_Sub04_trashbin",
156
+ "Date03_Sub04_yogaball_play",
157
+ "Date03_Sub04_yogaball_play2",
158
+ "Date03_Sub04_yogaball_sit",
159
+ "Date03_Sub04_yogamat",
160
+ "Date03_Sub05_backpack",
161
+ "Date03_Sub05_boxlarge",
162
+ "Date03_Sub05_boxlong",
163
+ "Date03_Sub05_boxmedium",
164
+ "Date03_Sub05_chairblack",
165
+ "Date03_Sub05_chairwood",
166
+ "Date03_Sub05_chairwood_part2",
167
+ "Date03_Sub05_monitor",
168
+ "Date03_Sub05_plasticcontainer",
169
+ "Date03_Sub05_stool",
170
+ "Date03_Sub05_suitcase",
171
+ "Date03_Sub05_tablesmall",
172
+ "Date03_Sub05_tablesquare",
173
+ "Date03_Sub05_toolbox",
174
+ "Date03_Sub05_trashbin",
175
+ "Date03_Sub05_yogaball",
176
+ "Date03_Sub05_yogamat",
177
+ "Date04_Sub05_backpack",
178
+ "Date04_Sub05_boxmedium",
179
+ "Date04_Sub05_boxsmall",
180
+ "Date04_Sub05_chairblack",
181
+ "Date04_Sub05_chairwood",
182
+ "Date04_Sub05_monitor",
183
+ "Date04_Sub05_monitor_part2",
184
+ "Date04_Sub05_monitor_sit",
185
+ "Date04_Sub05_plasticcontainer",
186
+ "Date04_Sub05_suitcase",
187
+ "Date04_Sub05_suitcase_open",
188
+ "Date04_Sub05_tablesmall",
189
+ "Date04_Sub05_tablesquare",
190
+ "Date04_Sub05_toolbox",
191
+ "Date04_Sub05_trashbin",
192
+ "Date04_Sub05_yogaball",
193
+ "Date04_Sub05_yogamat",
194
+ "Date05_Sub05_backpack",
195
+ "Date05_Sub05_chairblack",
196
+ "Date05_Sub05_chairwood",
197
+ "Date05_Sub05_yogaball",
198
+ "Date05_Sub06_backpack_back",
199
+ "Date05_Sub06_backpack_hand",
200
+ "Date05_Sub06_backpack_twohand",
201
+ "Date05_Sub06_boxlarge",
202
+ "Date05_Sub06_boxmedium",
203
+ "Date05_Sub06_boxsmall",
204
+ "Date05_Sub06_chairblack_hand",
205
+ "Date05_Sub06_chairblack_lift",
206
+ "Date05_Sub06_chairblack_sit",
207
+ "Date05_Sub06_chairwood_hand",
208
+ "Date05_Sub06_chairwood_lift",
209
+ "Date05_Sub06_chairwood_sit",
210
+ "Date05_Sub06_monitor_hand",
211
+ "Date05_Sub06_monitor_move",
212
+ "Date05_Sub06_plasticcontainer",
213
+ "Date05_Sub06_stool_lift",
214
+ "Date05_Sub06_stool_sit",
215
+ "Date05_Sub06_suitcase_hand",
216
+ "Date05_Sub06_suitcase_lift",
217
+ "Date05_Sub06_tablesmall_hand",
218
+ "Date05_Sub06_tablesmall_lean",
219
+ "Date05_Sub06_tablesmall_lift",
220
+ "Date05_Sub06_tablesquare_lift",
221
+ "Date05_Sub06_tablesquare_move",
222
+ "Date05_Sub06_tablesquare_sit",
223
+ "Date05_Sub06_trashbin",
224
+ "Date05_Sub06_yogaball_play",
225
+ "Date05_Sub06_yogaball_sit",
226
+ "Date05_Sub06_yogamat",
227
+ "Date06_Sub07_backpack_back",
228
+ "Date06_Sub07_backpack_hand",
229
+ "Date06_Sub07_backpack_twohand",
230
+ "Date06_Sub07_boxlarge",
231
+ "Date06_Sub07_boxmedium",
232
+ "Date06_Sub07_boxsmall",
233
+ "Date06_Sub07_chairblack_hand",
234
+ "Date06_Sub07_chairblack_lift",
235
+ "Date06_Sub07_chairblack_sit",
236
+ "Date06_Sub07_chairwood_hand",
237
+ "Date06_Sub07_chairwood_lift",
238
+ "Date06_Sub07_chairwood_sit",
239
+ "Date06_Sub07_monitor_move",
240
+ "Date06_Sub07_plasticcontainer",
241
+ "Date06_Sub07_stool_lift",
242
+ "Date06_Sub07_suitcase_move",
243
+ "Date06_Sub07_tablesmall_lean",
244
+ "Date06_Sub07_tablesmall_lift",
245
+ "Date06_Sub07_tablesmall_move",
246
+ "Date06_Sub07_tablesquare_lift",
247
+ "Date06_Sub07_tablesquare_move",
248
+ "Date06_Sub07_tablesquare_sit",
249
+ "Date06_Sub07_trashbin",
250
+ "Date06_Sub07_yogaball_play",
251
+ "Date06_Sub07_yogaball_sit",
252
+ "Date07_Sub04_backpack_back",
253
+ "Date07_Sub04_backpack_hand",
254
+ "Date07_Sub04_backpack_twohand",
255
+ "Date07_Sub04_boxlarge",
256
+ "Date07_Sub04_boxmedium",
257
+ "Date07_Sub04_chairblack_hand",
258
+ "Date07_Sub04_chairblack_lift",
259
+ "Date07_Sub04_chairblack_sit",
260
+ "Date07_Sub04_chairwood_hand",
261
+ "Date07_Sub04_chairwood_lift",
262
+ "Date07_Sub04_chairwood_sit",
263
+ "Date07_Sub04_monitor_hand",
264
+ "Date07_Sub04_monitor_move",
265
+ "Date07_Sub04_stool_lift",
266
+ "Date07_Sub04_stool_sit",
267
+ "Date07_Sub04_suitcase_open",
268
+ "Date07_Sub04_tablesmall_lean",
269
+ "Date07_Sub04_tablesmall_lift",
270
+ "Date07_Sub04_tablesmall_move",
271
+ "Date07_Sub04_tablesquare_lift",
272
+ "Date07_Sub04_tablesquare_move",
273
+ "Date07_Sub04_tablesquare_sit",
274
+ "Date07_Sub04_toolbox_lift",
275
+ "Date07_Sub04_trashbin",
276
+ "Date07_Sub04_yogaball_play",
277
+ "Date07_Sub04_yogaball_sit",
278
+ "Date07_Sub04_yogamat",
279
+ "Date07_Sub05_suitcase_lift",
280
+ "Date07_Sub05_tablesquare",
281
+ "Date07_Sub08_backpack_back",
282
+ "Date07_Sub08_backpack_hand",
283
+ "Date07_Sub08_backpack_hug",
284
+ "Date07_Sub08_boxlarge",
285
+ "Date07_Sub08_boxlong",
286
+ "Date07_Sub08_boxmedium",
287
+ "Date07_Sub08_boxsmall",
288
+ "Date07_Sub08_chairblack_hand",
289
+ "Date07_Sub08_chairblack_lift",
290
+ "Date07_Sub08_chairblack_sit",
291
+ "Date07_Sub08_chairwood_hand",
292
+ "Date07_Sub08_chairwood_lift",
293
+ "Date07_Sub08_chairwood_sit",
294
+ "Date07_Sub08_monitor_hand",
295
+ "Date07_Sub08_monitor_move",
296
+ "Date07_Sub08_plasticcontainer",
297
+ "Date07_Sub08_stool",
298
+ "Date07_Sub08_suitcase",
299
+ "Date07_Sub08_tablesmall",
300
+ "Date07_Sub08_tablesquare",
301
+ "Date07_Sub08_toolbox",
302
+ "Date07_Sub08_trashbin",
303
+ "Date07_Sub08_yogaball"
304
+ ],
305
+ "error_frames": {
306
+ "Date01_Sub01_boxsmall_hand": [
307
+ "t0008.467"
308
+ ],
309
+ "Date01_Sub01_boxtiny_hand": [
310
+ "t0045.000"
311
+ ],
312
+ "Date02_Sub02_backpack_back": [
313
+ "t0035.200"
314
+ ],
315
+ "Date02_Sub02_backpack_hand": [
316
+ "t0031.800"
317
+ ],
318
+ "Date02_Sub02_backpack_twohand": [
319
+ "t0037.033"
320
+ ],
321
+ "Date02_Sub02_boxtiny_hand": [
322
+ "t0009.800",
323
+ "t0010.333",
324
+ "t0010.467",
325
+ "t0013.100",
326
+ "t0013.300"
327
+ ],
328
+ "Date02_Sub02_monitor_move2": [
329
+ "t0015.167"
330
+ ],
331
+ "Date02_Sub02_plasticcontainer": [
332
+ "t0022.300"
333
+ ],
334
+ "Date02_Sub02_toolbox": [
335
+ "t0010.433",
336
+ "t0033.000",
337
+ "t0040.467"
338
+ ],
339
+ "Date02_Sub02_toolbox_part2": [
340
+ "t0048.533"
341
+ ],
342
+ "Date02_Sub02_yogamat": [
343
+ "t0014.300"
344
+ ],
345
+ "Date03_Sub03_boxlong": [
346
+ "t0024.367"
347
+ ],
348
+ "Date03_Sub03_plasticcontainer": [
349
+ "t0045.633"
350
+ ],
351
+ "Date03_Sub04_boxsmall": [
352
+ "t0020.000",
353
+ "t0021.000",
354
+ "t0024.100",
355
+ "t0024.133",
356
+ "t0025.000",
357
+ "t0025.633",
358
+ "t0027.000"
359
+ ],
360
+ "Date03_Sub04_boxtiny": [
361
+ "t0005.967",
362
+ "t0006.967",
363
+ "t0007.833",
364
+ "t0019.000",
365
+ "t0020.400",
366
+ "t0020.467"
367
+ ],
368
+ "Date03_Sub04_boxtiny_part2": [
369
+ "t0021.133"
370
+ ],
371
+ "Date03_Sub04_plasticcontainer_lift": [
372
+ "t0023.233"
373
+ ],
374
+ "Date03_Sub04_toolbox": [
375
+ "t0032.000",
376
+ "t0034.600",
377
+ "t0035.467",
378
+ "t0035.633",
379
+ "t0036.167",
380
+ "t0036.200",
381
+ "t0036.967",
382
+ "t0041.533"
383
+ ],
384
+ "Date03_Sub05_boxsmall": [
385
+ "t0006.700",
386
+ "t0036.767"
387
+ ],
388
+ "Date03_Sub05_boxtiny": [
389
+ "t0006.433",
390
+ "t0008.100",
391
+ "t0009.167",
392
+ "t0010.200",
393
+ "t0010.433",
394
+ "t0011.800",
395
+ "t0011.933",
396
+ "t0013.400",
397
+ "t0038.000",
398
+ "t0040.000",
399
+ "t0042.433",
400
+ "t0045.067"
401
+ ],
402
+ "Date04_Sub05_boxlarge": [
403
+ "t0038.000",
404
+ "t0038.067",
405
+ "t0039.000"
406
+ ],
407
+ "Date04_Sub05_boxlong": [
408
+ "t0031.333"
409
+ ],
410
+ "Date04_Sub05_boxtiny": [
411
+ "t0030.800",
412
+ "t0031.000",
413
+ "t0036.233"
414
+ ],
415
+ "Date04_Sub05_stool": [
416
+ "t0020.000"
417
+ ],
418
+ "Date05_Sub06_boxlong": [
419
+ "t0038.167"
420
+ ],
421
+ "Date05_Sub06_boxtiny": [
422
+ "t0016.933",
423
+ "t0035.033",
424
+ "t0037.000",
425
+ "t0043.067",
426
+ "t0043.200",
427
+ "t0044.967",
428
+ "t0045.000",
429
+ "t0047.000",
430
+ "t0047.200"
431
+ ],
432
+ "Date05_Sub06_toolbox": [
433
+ "t0011.000",
434
+ "t0012.200",
435
+ "t0022.000"
436
+ ],
437
+ "Date06_Sub07_boxlong": [
438
+ "t0042.000"
439
+ ],
440
+ "Date06_Sub07_boxtiny": [
441
+ "t0004.633",
442
+ "t0004.800",
443
+ "t0004.867"
444
+ ],
445
+ "Date06_Sub07_stool_sit": [
446
+ "t0046.000",
447
+ "t0046.900"
448
+ ],
449
+ "Date06_Sub07_suitcase_lift": [
450
+ "t0006.933",
451
+ "t0007.000"
452
+ ],
453
+ "Date06_Sub07_toolbox": [
454
+ "t0007.733",
455
+ "t0008.000",
456
+ "t0012.000",
457
+ "t0013.000",
458
+ "t0040.000",
459
+ "t0041.000",
460
+ "t0044.000",
461
+ "t0047.000"
462
+ ],
463
+ "Date06_Sub07_yogamat": [
464
+ "t0028.000"
465
+ ],
466
+ "Date07_Sub04_boxlong": [
467
+ "t0012.767"
468
+ ],
469
+ "Date07_Sub04_boxsmall": [
470
+ "t0046.300"
471
+ ],
472
+ "Date07_Sub04_boxtiny": [
473
+ "t0008.667",
474
+ "t0009.333",
475
+ "t0017.367",
476
+ "t0038.600",
477
+ "t0040.000",
478
+ "t0040.067",
479
+ "t0040.900",
480
+ "t0042.000",
481
+ "t0042.100",
482
+ "t0042.133",
483
+ "t0042.400",
484
+ "t0042.800",
485
+ "t0042.900"
486
+ ],
487
+ "Date07_Sub04_plasticcontainer": [
488
+ "t0024.167",
489
+ "t0025.633",
490
+ "t0025.833"
491
+ ],
492
+ "Date07_Sub04_suitcase_lift": [
493
+ "t0024.000",
494
+ "t0025.000"
495
+ ],
496
+ "Date07_Sub05_suitcase_open": [
497
+ "t0031.000",
498
+ "t0032.000"
499
+ ],
500
+ "Date07_Sub05_tablesmall": [
501
+ "t0076.000",
502
+ "t0117.567"
503
+ ],
504
+ "Date07_Sub08_boxtiny": [
505
+ "t0017.933"
506
+ ],
507
+ "Date07_Sub08_yogamat": [
508
+ "t0005.867"
509
+ ]
510
+ }
511
+ }
data/preprocess/behave.py ADDED
File without changes
data/preprocess/behave_test/behave_simple_test.npz.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # load split.json and make an npz with all the folders in the test split
2
+ import argparse
3
+ import json
4
+ import os
5
+ import glob
6
+ import numpy as np
7
+
8
+ BEHAVE_PATH = '/ps/project/datasets/BEHAVE/sequences/'
9
+
10
+ if __name__ == '__main__':
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument('--data_dir', type=str, default=BEHAVE_PATH)
13
+ parser.add_argument('--split_file', type=str, default='data/preprocess/behave_test/split.json')
14
+ parser.add_argument('--out_file', type=str, default='data/dataset_extras/behave/behave_simple_test.npz')
15
+ args = parser.parse_args()
16
+
17
+ with open(args.split_file, 'r') as f:
18
+ split = json.load(f)
19
+
20
+ test_split = split['test']
21
+
22
+ # structs we use
23
+ imgnames_ = []
24
+
25
+ data = {}
26
+ for seq_name in test_split:
27
+ print(seq_name)
28
+ seq_dir = os.path.join(args.data_dir, seq_name)
29
+ # get recusive images in the seq_dir folder
30
+ images = glob.glob(os.path.join(seq_dir, '**/*color.jpg'), recursive=True)
31
+ print(len(images))
32
+ images.sort()
33
+ imgnames_.extend(images)
34
+
35
+ np.savez(args.out_file, imgname=imgnames_,)
36
+ print('Saved to ', args.out_file)
37
+
data/preprocess/behave_test/split.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": [
3
+ "Date01_Sub01_backpack_back",
4
+ "Date01_Sub01_backpack_hand",
5
+ "Date01_Sub01_backpack_hug",
6
+ "Date01_Sub01_basketball",
7
+ "Date01_Sub01_boxlarge_hand",
8
+ "Date01_Sub01_boxlong_hand",
9
+ "Date01_Sub01_boxmedium_hand",
10
+ "Date01_Sub01_boxsmall_hand",
11
+ "Date01_Sub01_boxtiny_hand",
12
+ "Date01_Sub01_chairblack_hand",
13
+ "Date01_Sub01_chairblack_lift",
14
+ "Date01_Sub01_chairblack_sit",
15
+ "Date01_Sub01_chairwood_hand",
16
+ "Date01_Sub01_chairwood_lift",
17
+ "Date01_Sub01_chairwood_sit",
18
+ "Date01_Sub01_keyboard_move",
19
+ "Date01_Sub01_keyboard_typing",
20
+ "Date01_Sub01_monitor_hand",
21
+ "Date01_Sub01_monitor_move",
22
+ "Date01_Sub01_plasticcontainer",
23
+ "Date01_Sub01_stool_move",
24
+ "Date01_Sub01_stool_sit",
25
+ "Date01_Sub01_suitcase",
26
+ "Date01_Sub01_suitcase_lift",
27
+ "Date01_Sub01_tablesmall_lean",
28
+ "Date01_Sub01_tablesmall_lift",
29
+ "Date01_Sub01_tablesmall_move",
30
+ "Date01_Sub01_tablesquare_hand",
31
+ "Date01_Sub01_tablesquare_lift",
32
+ "Date01_Sub01_tablesquare_sit",
33
+ "Date01_Sub01_toolbox",
34
+ "Date01_Sub01_trashbin",
35
+ "Date01_Sub01_yogaball",
36
+ "Date01_Sub01_yogaball_play",
37
+ "Date01_Sub01_yogamat_hand",
38
+ "Date02_Sub02_backpack_back",
39
+ "Date02_Sub02_backpack_hand",
40
+ "Date02_Sub02_backpack_twohand",
41
+ "Date02_Sub02_basketball",
42
+ "Date02_Sub02_boxlarge_hand",
43
+ "Date02_Sub02_boxlong_hand",
44
+ "Date02_Sub02_boxmedium_hand",
45
+ "Date02_Sub02_boxsmall_hand",
46
+ "Date02_Sub02_boxtiny_hand",
47
+ "Date02_Sub02_chairblack_hand",
48
+ "Date02_Sub02_chairblack_lift",
49
+ "Date02_Sub02_chairblack_sit",
50
+ "Date02_Sub02_chairwood_hand",
51
+ "Date02_Sub02_chairwood_sit",
52
+ "Date02_Sub02_keyboard_move",
53
+ "Date02_Sub02_keyboard_typing",
54
+ "Date02_Sub02_monitor_hand",
55
+ "Date02_Sub02_monitor_move",
56
+ "Date02_Sub02_plasticcontainer",
57
+ "Date02_Sub02_stool_move",
58
+ "Date02_Sub02_stool_sit",
59
+ "Date02_Sub02_suitcase_ground",
60
+ "Date02_Sub02_suitcase_lift",
61
+ "Date02_Sub02_tablesmall_lean",
62
+ "Date02_Sub02_tablesmall_lift",
63
+ "Date02_Sub02_tablesmall_move",
64
+ "Date02_Sub02_tablesquare_lift",
65
+ "Date02_Sub02_tablesquare_move",
66
+ "Date02_Sub02_tablesquare_sit",
67
+ "Date02_Sub02_toolbox",
68
+ "Date02_Sub02_trashbin",
69
+ "Date02_Sub02_yogaball_play",
70
+ "Date02_Sub02_yogaball_sit",
71
+ "Date02_Sub02_yogamat",
72
+ "Date04_Sub05_backpack",
73
+ "Date04_Sub05_basketball",
74
+ "Date04_Sub05_boxlarge",
75
+ "Date04_Sub05_boxlong",
76
+ "Date04_Sub05_boxmedium",
77
+ "Date04_Sub05_boxsmall",
78
+ "Date04_Sub05_boxtiny",
79
+ "Date04_Sub05_chairblack",
80
+ "Date04_Sub05_chairwood",
81
+ "Date04_Sub05_keyboard",
82
+ "Date04_Sub05_monitor",
83
+ "Date04_Sub05_monitor_sit",
84
+ "Date04_Sub05_plasticcontainer",
85
+ "Date04_Sub05_stool",
86
+ "Date04_Sub05_suitcase",
87
+ "Date04_Sub05_suitcase_open",
88
+ "Date04_Sub05_tablesmall",
89
+ "Date04_Sub05_tablesquare",
90
+ "Date04_Sub05_toolbox",
91
+ "Date04_Sub05_trashbin",
92
+ "Date04_Sub05_yogaball",
93
+ "Date04_Sub05_yogamat",
94
+ "Date05_Sub05_backpack",
95
+ "Date05_Sub05_chairblack",
96
+ "Date05_Sub05_chairwood",
97
+ "Date05_Sub05_yogaball",
98
+ "Date05_Sub06_backpack_back",
99
+ "Date05_Sub06_backpack_hand",
100
+ "Date05_Sub06_backpack_twohand",
101
+ "Date05_Sub06_basketball",
102
+ "Date05_Sub06_boxlarge",
103
+ "Date05_Sub06_boxlong",
104
+ "Date05_Sub06_boxmedium",
105
+ "Date05_Sub06_boxsmall",
106
+ "Date05_Sub06_boxtiny",
107
+ "Date05_Sub06_chairblack_hand",
108
+ "Date05_Sub06_chairblack_lift",
109
+ "Date05_Sub06_chairblack_sit",
110
+ "Date05_Sub06_chairwood_hand",
111
+ "Date05_Sub06_chairwood_lift",
112
+ "Date05_Sub06_chairwood_sit",
113
+ "Date05_Sub06_keyboard_hand",
114
+ "Date05_Sub06_keyboard_move",
115
+ "Date05_Sub06_monitor_hand",
116
+ "Date05_Sub06_monitor_move",
117
+ "Date05_Sub06_plasticcontainer",
118
+ "Date05_Sub06_stool_lift",
119
+ "Date05_Sub06_stool_sit",
120
+ "Date05_Sub06_suitcase_hand",
121
+ "Date05_Sub06_suitcase_lift",
122
+ "Date05_Sub06_tablesmall_hand",
123
+ "Date05_Sub06_tablesmall_lean",
124
+ "Date05_Sub06_tablesmall_lift",
125
+ "Date05_Sub06_tablesquare_lift",
126
+ "Date05_Sub06_tablesquare_move",
127
+ "Date05_Sub06_tablesquare_sit",
128
+ "Date05_Sub06_toolbox",
129
+ "Date05_Sub06_trashbin",
130
+ "Date05_Sub06_yogaball_play",
131
+ "Date05_Sub06_yogaball_sit",
132
+ "Date05_Sub06_yogamat",
133
+ "Date06_Sub07_backpack_back",
134
+ "Date06_Sub07_backpack_hand",
135
+ "Date06_Sub07_backpack_twohand",
136
+ "Date06_Sub07_basketball",
137
+ "Date06_Sub07_boxlarge",
138
+ "Date06_Sub07_boxlong",
139
+ "Date06_Sub07_boxmedium",
140
+ "Date06_Sub07_boxsmall",
141
+ "Date06_Sub07_boxtiny",
142
+ "Date06_Sub07_chairblack_hand",
143
+ "Date06_Sub07_chairblack_lift",
144
+ "Date06_Sub07_chairblack_sit",
145
+ "Date06_Sub07_chairwood_hand",
146
+ "Date06_Sub07_chairwood_lift",
147
+ "Date06_Sub07_chairwood_sit",
148
+ "Date06_Sub07_keyboard_move",
149
+ "Date06_Sub07_keyboard_typing",
150
+ "Date06_Sub07_monitor_move",
151
+ "Date06_Sub07_plasticcontainer",
152
+ "Date06_Sub07_stool_lift",
153
+ "Date06_Sub07_stool_sit",
154
+ "Date06_Sub07_suitcase_lift",
155
+ "Date06_Sub07_suitcase_move",
156
+ "Date06_Sub07_tablesmall_lean",
157
+ "Date06_Sub07_tablesmall_lift",
158
+ "Date06_Sub07_tablesmall_move",
159
+ "Date06_Sub07_tablesquare_lift",
160
+ "Date06_Sub07_tablesquare_move",
161
+ "Date06_Sub07_tablesquare_sit",
162
+ "Date06_Sub07_toolbox",
163
+ "Date06_Sub07_trashbin",
164
+ "Date06_Sub07_yogaball_play",
165
+ "Date06_Sub07_yogaball_sit",
166
+ "Date06_Sub07_yogamat",
167
+ "Date07_Sub04_backpack_back",
168
+ "Date07_Sub04_backpack_hand",
169
+ "Date07_Sub04_backpack_twohand",
170
+ "Date07_Sub04_basketball",
171
+ "Date07_Sub04_boxlarge",
172
+ "Date07_Sub04_boxlong",
173
+ "Date07_Sub04_boxmedium",
174
+ "Date07_Sub04_boxsmall",
175
+ "Date07_Sub04_boxtiny",
176
+ "Date07_Sub04_chairblack_hand",
177
+ "Date07_Sub04_chairblack_lift",
178
+ "Date07_Sub04_chairblack_sit",
179
+ "Date07_Sub04_chairwood_hand",
180
+ "Date07_Sub04_chairwood_lift",
181
+ "Date07_Sub04_chairwood_sit",
182
+ "Date07_Sub04_keyboard_move",
183
+ "Date07_Sub04_keyboard_typing",
184
+ "Date07_Sub04_monitor_hand",
185
+ "Date07_Sub04_monitor_move",
186
+ "Date07_Sub04_plasticcontainer",
187
+ "Date07_Sub04_stool_lift",
188
+ "Date07_Sub04_stool_sit",
189
+ "Date07_Sub04_suitcase_lift",
190
+ "Date07_Sub04_suitcase_open",
191
+ "Date07_Sub04_tablesmall_lean",
192
+ "Date07_Sub04_tablesmall_lift",
193
+ "Date07_Sub04_tablesmall_move",
194
+ "Date07_Sub04_tablesquare_lift",
195
+ "Date07_Sub04_tablesquare_move",
196
+ "Date07_Sub04_tablesquare_sit",
197
+ "Date07_Sub04_toolbox_lift",
198
+ "Date07_Sub04_trashbin",
199
+ "Date07_Sub04_yogaball_play",
200
+ "Date07_Sub04_yogaball_sit",
201
+ "Date07_Sub04_yogamat",
202
+ "Date07_Sub05_suitcase_lift",
203
+ "Date07_Sub05_suitcase_open",
204
+ "Date07_Sub05_tablesmall",
205
+ "Date07_Sub05_tablesquare",
206
+ "Date07_Sub08_backpack_back",
207
+ "Date07_Sub08_backpack_hand",
208
+ "Date07_Sub08_backpack_hug",
209
+ "Date07_Sub08_basketball",
210
+ "Date07_Sub08_boxlarge",
211
+ "Date07_Sub08_boxlong",
212
+ "Date07_Sub08_boxmedium",
213
+ "Date07_Sub08_boxsmall",
214
+ "Date07_Sub08_boxtiny",
215
+ "Date07_Sub08_chairblack_hand",
216
+ "Date07_Sub08_chairblack_lift",
217
+ "Date07_Sub08_chairblack_sit",
218
+ "Date07_Sub08_chairwood_hand",
219
+ "Date07_Sub08_chairwood_lift",
220
+ "Date07_Sub08_chairwood_sit",
221
+ "Date07_Sub08_keyboard_move",
222
+ "Date07_Sub08_keyboard_typing",
223
+ "Date07_Sub08_monitor_hand",
224
+ "Date07_Sub08_monitor_move",
225
+ "Date07_Sub08_plasticcontainer",
226
+ "Date07_Sub08_stool",
227
+ "Date07_Sub08_suitcase",
228
+ "Date07_Sub08_tablesmall",
229
+ "Date07_Sub08_tablesquare",
230
+ "Date07_Sub08_toolbox",
231
+ "Date07_Sub08_trashbin",
232
+ "Date07_Sub08_yogaball",
233
+ "Date07_Sub08_yogamat"
234
+ ],
235
+ "test": [
236
+ "Date03_Sub03_backpack_back",
237
+ "Date03_Sub03_backpack_hand",
238
+ "Date03_Sub03_backpack_hug",
239
+ "Date03_Sub03_basketball",
240
+ "Date03_Sub03_boxlarge",
241
+ "Date03_Sub03_boxlong",
242
+ "Date03_Sub03_boxmedium",
243
+ "Date03_Sub03_boxsmall",
244
+ "Date03_Sub03_boxtiny",
245
+ "Date03_Sub03_chairblack_hand",
246
+ "Date03_Sub03_chairblack_lift",
247
+ "Date03_Sub03_chairblack_sit",
248
+ "Date03_Sub03_chairblack_sitstand",
249
+ "Date03_Sub03_chairwood_hand",
250
+ "Date03_Sub03_chairwood_lift",
251
+ "Date03_Sub03_chairwood_sit",
252
+ "Date03_Sub03_keyboard_move",
253
+ "Date03_Sub03_keyboard_typing",
254
+ "Date03_Sub03_monitor_move",
255
+ "Date03_Sub03_plasticcontainer",
256
+ "Date03_Sub03_stool_lift",
257
+ "Date03_Sub03_stool_sit",
258
+ "Date03_Sub03_suitcase_lift",
259
+ "Date03_Sub03_suitcase_move",
260
+ "Date03_Sub03_tablesmall_lean",
261
+ "Date03_Sub03_tablesmall_lift",
262
+ "Date03_Sub03_tablesmall_move",
263
+ "Date03_Sub03_tablesquare_lift",
264
+ "Date03_Sub03_tablesquare_move",
265
+ "Date03_Sub03_tablesquare_sit",
266
+ "Date03_Sub03_toolbox",
267
+ "Date03_Sub03_trashbin",
268
+ "Date03_Sub03_yogaball_play",
269
+ "Date03_Sub03_yogaball_sit",
270
+ "Date03_Sub03_yogamat",
271
+ "Date03_Sub04_backpack_back",
272
+ "Date03_Sub04_backpack_hand",
273
+ "Date03_Sub04_backpack_hug",
274
+ "Date03_Sub04_basketball",
275
+ "Date03_Sub04_boxlarge",
276
+ "Date03_Sub04_boxlong",
277
+ "Date03_Sub04_boxmedium",
278
+ "Date03_Sub04_boxsmall",
279
+ "Date03_Sub04_boxtiny",
280
+ "Date03_Sub04_chairblack_hand",
281
+ "Date03_Sub04_chairblack_liftreal",
282
+ "Date03_Sub04_chairblack_sit",
283
+ "Date03_Sub04_chairwood_hand",
284
+ "Date03_Sub04_chairwood_lift",
285
+ "Date03_Sub04_chairwood_sit",
286
+ "Date03_Sub04_keyboard_move",
287
+ "Date03_Sub04_keyboard_typing",
288
+ "Date03_Sub04_monitor_hand",
289
+ "Date03_Sub04_monitor_move",
290
+ "Date03_Sub04_plasticcontainer_lift",
291
+ "Date03_Sub04_stool_move",
292
+ "Date03_Sub04_stool_sit",
293
+ "Date03_Sub04_suitcase_ground",
294
+ "Date03_Sub04_suitcase_lift",
295
+ "Date03_Sub04_tablesmall_hand",
296
+ "Date03_Sub04_tablesmall_lean",
297
+ "Date03_Sub04_tablesmall_lift",
298
+ "Date03_Sub04_tablesquare_hand",
299
+ "Date03_Sub04_tablesquare_lift",
300
+ "Date03_Sub04_tablesquare_sit",
301
+ "Date03_Sub04_toolbox",
302
+ "Date03_Sub04_trashbin",
303
+ "Date03_Sub04_yogaball_play",
304
+ "Date03_Sub04_yogaball_sit",
305
+ "Date03_Sub04_yogamat",
306
+ "Date03_Sub05_backpack",
307
+ "Date03_Sub05_basketball",
308
+ "Date03_Sub05_boxlarge",
309
+ "Date03_Sub05_boxlong",
310
+ "Date03_Sub05_boxmedium",
311
+ "Date03_Sub05_boxsmall",
312
+ "Date03_Sub05_boxtiny",
313
+ "Date03_Sub05_chairblack",
314
+ "Date03_Sub05_chairwood",
315
+ "Date03_Sub05_keyboard",
316
+ "Date03_Sub05_monitor",
317
+ "Date03_Sub05_plasticcontainer",
318
+ "Date03_Sub05_stool",
319
+ "Date03_Sub05_suitcase",
320
+ "Date03_Sub05_tablesmall",
321
+ "Date03_Sub05_tablesquare",
322
+ "Date03_Sub05_toolbox",
323
+ "Date03_Sub05_trashbin",
324
+ "Date03_Sub05_yogaball",
325
+ "Date03_Sub05_yogamat"
326
+ ]
327
+ }
data/preprocess/hot_dca.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import sys
6
+ import imagesize
7
+ import argparse
8
+ import torch
9
+ import pandas as pd
10
+ import json
11
+
12
+ import monai.metrics as metrics
13
+
14
+ HOT_TRAIN_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_train.odgt"
15
+ HOT_VAL_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_validation.odgt"
16
+ HOT_TEST_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_test.odgt"
17
+
18
+ def metric(mask, pred, back=True):
19
+ iou = metrics.compute_meaniou(pred, mask, back, False)
20
+ iou = iou.mean()
21
+ return iou
22
+
23
+
24
+ def combine_hot_prox_split(split):
25
+ if split == 'train':
26
+ with open(HOT_TRAIN_SPLIT, "r") as f:
27
+ records = [
28
+ json.loads(line.strip("\n")) for line in f.readlines()
29
+ ]
30
+ elif split == 'val':
31
+ with open(HOT_VAL_SPLIT, "r") as f:
32
+ records = [
33
+ json.loads(line.strip("\n")) for line in f.readlines()
34
+ ]
35
+ elif split == 'test':
36
+ with open(HOT_TEST_SPLIT, "r") as f:
37
+ records = [
38
+ json.loads(line.strip("\n")) for line in f.readlines()
39
+ ]
40
+ return records
41
+
42
+ def hot_extract(img_dataset_path, smpl_params_path, dca_csv_path, out_dir, split=None, vis_path=None, visualize=False, include_supporting=True):
43
+
44
+ n_vertices = 6890
45
+
46
+ # structs we use
47
+ imgnames_ = []
48
+ poses_, shapes_, transls_ = [], [], []
49
+ cams_k_ = []
50
+ polygon_2d_contact_ = []
51
+ contact_3d_labels_ = []
52
+ scene_seg_, part_seg_ = [], []
53
+
54
+ img_dir = os.path.join(img_dataset_path, 'images', 'training')
55
+ smpl_params = np.load(smpl_params_path)
56
+ # smpl_params = np.load(smpl_params_path, allow_pickle=True)
57
+ # smpl_params = smpl_params['arr_0'].item()
58
+ annotations_dir = img_dir.replace('images', 'annotations')
59
+ records = combine_hot_prox_split(split)
60
+
61
+ # load dca csv
62
+ dca_csv = pd.read_csv(dca_csv_path)
63
+
64
+ iou_thresh = 0
65
+
66
+ num_with_3d_contact = 0
67
+
68
+ focal_length_accumulator = []
69
+ for i, record in enumerate(tqdm(records, dynamic_ncols=True)):
70
+ imgpath = record['fpath_img']
71
+ imgname = os.path.basename(imgpath)
72
+ # save image in temp_images
73
+ if visualize:
74
+ img = cv2.imread(os.path.join(img_dir, imgname))
75
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(imgname)), img)
76
+
77
+ # load image to get the size
78
+ img_w, img_h = record["width"], record["height"]
79
+
80
+ # get mask anns
81
+ polygon_2d_contact_path = os.path.join(annotations_dir, os.path.splitext(imgname)[0] + '.png')
82
+
83
+
84
+ # Get 3D contact annotations from DCA mturk csv
85
+ dca_row = dca_csv.loc[dca_csv['imgnames'] == imgname] # if no imgnames column, run scripts/datascripts/add_imgname_column_to_deco_csv.py
86
+ if len(dca_row) == 0:
87
+ contact_3d_labels = []
88
+ continue
89
+ else:
90
+ num_with_3d_contact += 1
91
+ supporting_object = dca_row['supporting_object'].values[0]
92
+ vertices = eval(dca_row['vertices'].values[0])
93
+ contact_3d_list = vertices[os.path.join('hot/training/', imgname)]
94
+ # Aggregate values in all keys
95
+ contact_3d_idx = []
96
+ for item in contact_3d_list:
97
+ # one iteration loop as it is a list of one dict key value
98
+ for k, v in item.items():
99
+ if include_supporting:
100
+ contact_3d_idx.extend(v)
101
+ else:
102
+ if k != 'SUPPORTING':
103
+ contact_3d_idx.extend(v)
104
+ # removed repeated values
105
+ contact_3d_idx = list(set(contact_3d_idx))
106
+ contact_3d_labels = np.zeros(n_vertices) # smpl has 6980 vertices
107
+ contact_3d_labels[contact_3d_idx] = 1.
108
+
109
+ # find indices that match the imname
110
+ inds = np.where(smpl_params['imgname'] == os.path.join(img_dir, imgname))[0]
111
+ select_inds = []
112
+ ious = []
113
+ for ind in inds:
114
+ # part mask
115
+ part_path = smpl_params['part_seg'][ind]
116
+ # load the part_mask
117
+ part_mask = cv2.imread(part_path)
118
+ # binarize the part mask
119
+ part_mask = np.where(part_mask > 0, 1, 0)
120
+ # save part mask
121
+ if visualize:
122
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(part_path)), part_mask*255)
123
+
124
+ # load gt polygon mask
125
+ polygon_2d_contact = cv2.imread(polygon_2d_contact_path)
126
+ # binarize the gt polygon mask
127
+ polygon_2d_contact = np.where(polygon_2d_contact > 0, 1, 0)
128
+
129
+ # save gt polygon mask in temp_images
130
+ if visualize:
131
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(polygon_2d_contact_path)), polygon_2d_contact*255)
132
+
133
+ polygon_2d_contact = torch.from_numpy(polygon_2d_contact)[None,:].permute(0,3,1,2)
134
+ part_mask = torch.from_numpy(part_mask)[None,:].permute(0,3,1,2)
135
+ # compute iou with part mask and gt polygon mask
136
+ iou = metric(polygon_2d_contact, part_mask)
137
+ if iou > iou_thresh:
138
+ ious.append(iou)
139
+ select_inds.append(ind)
140
+
141
+ # get select_ind with maximum iou
142
+ if len(select_inds) > 0:
143
+ max_iou_ind = select_inds[np.argmax(ious)]
144
+ else:
145
+ continue
146
+
147
+ # part mask
148
+ part_path = smpl_params['part_seg'][max_iou_ind]
149
+
150
+ # scene mask
151
+ scene_path = smpl_params['scene_seg'][max_iou_ind]
152
+
153
+ # get smpl params
154
+ pose = smpl_params['pose'][max_iou_ind]
155
+ shape = smpl_params['shape'][max_iou_ind]
156
+ transl = smpl_params['global_t'][max_iou_ind]
157
+ focal_length = smpl_params['focal_l'][max_iou_ind]
158
+ camC = np.array([[img_w//2, img_h//2]])
159
+
160
+ # read GT 2D keypoints
161
+ K = np.eye(3, dtype=np.float64)
162
+ K[0, 0] = focal_length
163
+ K[1, 1] = focal_length
164
+ K[:2, 2:] = camC.T
165
+
166
+ # store data
167
+ imgnames_.append(os.path.join(img_dir, imgname))
168
+ polygon_2d_contact_.append(polygon_2d_contact_path)
169
+ # we use the heuristic that the 3D contact labeled is for the person with maximum iou with HOT contacts
170
+ contact_3d_labels_.append(contact_3d_labels)
171
+ scene_seg_.append(scene_path)
172
+ part_seg_.append(part_path)
173
+ poses_.append(pose.squeeze())
174
+ transls_.append(transl.squeeze())
175
+ shapes_.append(shape.squeeze())
176
+ cams_k_.append(K.tolist())
177
+ focal_length_accumulator.append(focal_length)
178
+
179
+ print('Average focal length: ', np.mean(focal_length_accumulator))
180
+ print('Median focal length: ', np.median(focal_length_accumulator))
181
+ print('Std Dev focal length: ', np.std(focal_length_accumulator))
182
+
183
+ # store the data struct
184
+ os.makedirs(out_dir, exist_ok=True)
185
+ out_file = os.path.join(out_dir, f'hot_dca_supporting_{str(include_supporting)}_{split}.npz')
186
+ np.savez(out_file, imgname=imgnames_,
187
+ pose=poses_,
188
+ transl=transls_,
189
+ shape=shapes_,
190
+ cam_k=cams_k_,
191
+ polygon_2d_contact=polygon_2d_contact_,
192
+ contact_label=contact_3d_labels_,
193
+ scene_seg=scene_seg_,
194
+ part_seg=part_seg_
195
+ )
196
+ print(f'Total number of rows: {len(imgnames_)}')
197
+ print('Saved to ', out_file)
198
+ print(f'Number of images with 3D contact labels: {num_with_3d_contact}')
199
+
200
+ if __name__ == '__main__':
201
+ parser = argparse.ArgumentParser()
202
+ parser.add_argument('--img_dataset_path', type=str, default='/ps/project/datasets/HOT/Contact_Data/')
203
+ parser.add_argument('--smpl_params_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/hot.npz')
204
+ parser.add_argument('--dca_csv_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/dca.csv')
205
+ parser.add_argument('--out_dir', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/data/dataset_extras')
206
+ parser.add_argument('--vis_path', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/temp_images')
207
+ parser.add_argument('--visualize', action='store_true', default=False)
208
+ parser.add_argument('--include_supporting', action='store_true', default=False)
209
+ parser.add_argument('--split', type=str, default='train')
210
+ args = parser.parse_args()
211
+
212
+ hot_extract(img_dataset_path=args.img_dataset_path,
213
+ smpl_params_path=args.smpl_params_path,
214
+ dca_csv_path=args.dca_csv_path,
215
+ out_dir=args.out_dir,
216
+ vis_path=args.vis_path,
217
+ visualize=args.visualize,
218
+ split=args.split,
219
+ include_supporting=args.include_supporting)
220
+
data/preprocess/hot_noprox.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import sys
6
+ import imagesize
7
+ import argparse
8
+ import torch
9
+ import pandas as pd
10
+ import json
11
+
12
+ import monai.metrics as metrics
13
+
14
+ HOT_TRAIN_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_train.odgt"
15
+ HOT_VAL_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_validation.odgt"
16
+ HOT_TEST_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/hot_test.odgt"
17
+
18
+ def metric(mask, pred, back=True):
19
+ iou = metrics.compute_meaniou(pred, mask, back, False)
20
+ iou = iou.mean()
21
+ return iou
22
+
23
+
24
+ def combine_hot_prox_split(split):
25
+ if split == 'train':
26
+ with open(HOT_TRAIN_SPLIT, "r") as f:
27
+ records = [
28
+ json.loads(line.strip("\n")) for line in f.readlines()
29
+ ]
30
+ elif split == 'val':
31
+ with open(HOT_VAL_SPLIT, "r") as f:
32
+ records = [
33
+ json.loads(line.strip("\n")) for line in f.readlines()
34
+ ]
35
+ elif split == 'test':
36
+ with open(HOT_TEST_SPLIT, "r") as f:
37
+ records = [
38
+ json.loads(line.strip("\n")) for line in f.readlines()
39
+ ]
40
+ elif split == 'trainval':
41
+ with open(HOT_TRAIN_SPLIT, "r") as f:
42
+ train_records = [
43
+ json.loads(line.strip("\n")) for line in f.readlines()
44
+ ]
45
+ with open(HOT_VAL_SPLIT, "r") as f:
46
+ val_records = [
47
+ json.loads(line.strip("\n")) for line in f.readlines()
48
+ ]
49
+ records = train_records + val_records
50
+ return records
51
+
52
+ def hot_extract(img_dataset_path, smpl_params_path, dca_csv_path, out_dir, split=None, vis_path=None, visualize=False, record_idx=None, include_supporting=True):
53
+
54
+ n_vertices = 6890
55
+
56
+ # structs we use
57
+ imgnames_ = []
58
+ poses_, shapes_, transls_ = [], [], []
59
+ cams_k_ = []
60
+ polygon_2d_contact_ = []
61
+ contact_3d_labels_ = []
62
+ scene_seg_, part_seg_ = [], []
63
+
64
+ img_dir = os.path.join(img_dataset_path, 'images', 'training')
65
+ smpl_params = np.load(smpl_params_path)
66
+ # smpl_params = np.load(smpl_params_path, allow_pickle=True)
67
+ # smpl_params = smpl_params['arr_0'].item()
68
+ annotations_dir = img_dir.replace('images', 'annotations')
69
+ records = combine_hot_prox_split(split)
70
+ # split records list into 4 sublists
71
+ if record_idx is not None:
72
+ records = np.array_split(records, 4)[record_idx]
73
+
74
+ # load dca csv
75
+ dca_csv = pd.read_csv(dca_csv_path)
76
+
77
+ iou_thresh = 0
78
+
79
+ num_with_3d_contact = 0
80
+
81
+ focal_length_accumulator = []
82
+ for i, record in enumerate(tqdm(records, dynamic_ncols=True)):
83
+ imgpath = record['fpath_img']
84
+ imgname = os.path.basename(imgpath)
85
+ # save image in temp_images
86
+ if visualize:
87
+ img = cv2.imread(os.path.join(img_dir, imgname))
88
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(imgname)), img)
89
+
90
+ # load image to get the size
91
+ img_w, img_h = record["width"], record["height"]
92
+
93
+ # get mask anns
94
+ polygon_2d_contact_path = os.path.join(annotations_dir, os.path.splitext(imgname)[0] + '.png')
95
+
96
+
97
+ # Get 3D contact annotations from DCA mturk csv
98
+ dca_row = dca_csv.loc[dca_csv['imgnames'] == imgname] # if no imgnames column, run scripts/datascripts/add_imgname_column_to_deco_csv.py
99
+ if len(dca_row) == 0:
100
+ contact_3d_labels = []
101
+ else:
102
+ num_with_3d_contact += 1
103
+ supporting_object = dca_row['supporting_object'].values[0]
104
+ vertices = eval(dca_row['vertices'].values[0])
105
+ contact_3d_list = vertices[os.path.join('hot/training/', imgname)]
106
+ # Aggregate values in all keys
107
+ contact_3d_idx = []
108
+ for item in contact_3d_list:
109
+ # one iteration loop as it is a list of one dict key value
110
+ for k, v in item.items():
111
+ if include_supporting:
112
+ contact_3d_idx.extend(v)
113
+ else:
114
+ if k != 'SUPPORTING':
115
+ contact_3d_idx.extend(v)
116
+ # removed repeated values
117
+ contact_3d_idx = list(set(contact_3d_idx))
118
+ contact_3d_labels = np.zeros(n_vertices) # smpl has 6980 vertices
119
+ contact_3d_labels[contact_3d_idx] = 1.
120
+
121
+ # find indices that match the imname
122
+ inds = np.where(smpl_params['imgname'] == os.path.join(img_dir, imgname))[0]
123
+ select_inds = []
124
+ ious = []
125
+ for ind in inds:
126
+ # part mask
127
+ part_path = smpl_params['part_seg'][ind]
128
+ # load the part_mask
129
+ part_mask = cv2.imread(part_path)
130
+ # binarize the part mask
131
+ part_mask = np.where(part_mask > 0, 1, 0)
132
+ # save part mask
133
+ if visualize:
134
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(part_path)), part_mask*255)
135
+
136
+ # load gt polygon mask
137
+ polygon_2d_contact = cv2.imread(polygon_2d_contact_path)
138
+ # binarize the gt polygon mask
139
+ polygon_2d_contact = np.where(polygon_2d_contact > 0, 1, 0)
140
+
141
+ # save gt polygon mask in temp_images
142
+ if visualize:
143
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(polygon_2d_contact_path)), polygon_2d_contact*255)
144
+
145
+ polygon_2d_contact = torch.from_numpy(polygon_2d_contact)[None,:].permute(0,3,1,2)
146
+ part_mask = torch.from_numpy(part_mask)[None,:].permute(0,3,1,2)
147
+ # compute iou with part mask and gt polygon mask
148
+ iou = metric(polygon_2d_contact, part_mask)
149
+ if iou > iou_thresh:
150
+ ious.append(iou)
151
+ select_inds.append(ind)
152
+
153
+ # get select_ind with maximum iou
154
+ if len(select_inds) > 0:
155
+ max_iou_ind = select_inds[np.argmax(ious)]
156
+ else:
157
+ continue
158
+
159
+ for ind in select_inds:
160
+ # part mask
161
+ part_path = smpl_params['part_seg'][ind]
162
+
163
+ # scene mask
164
+ scene_path = smpl_params['scene_seg'][ind]
165
+
166
+ # get smpl params
167
+ pose = smpl_params['pose'][ind]
168
+ shape = smpl_params['shape'][ind]
169
+ transl = smpl_params['global_t'][ind]
170
+ focal_length = smpl_params['focal_l'][ind]
171
+ camC = np.array([[img_w//2, img_h//2]])
172
+
173
+ # read GT 2D keypoints
174
+ K = np.eye(3, dtype=np.float64)
175
+ K[0, 0] = focal_length
176
+ K[1, 1] = focal_length
177
+ K[:2, 2:] = camC.T
178
+
179
+ # store data
180
+ imgnames_.append(os.path.join(img_dir, imgname))
181
+ polygon_2d_contact_.append(polygon_2d_contact_path)
182
+ # we use the heuristic that the 3D contact labeled is for the person with maximum iou with HOT contacts
183
+ if ind == max_iou_ind:
184
+ contact_3d_labels_.append(contact_3d_labels)
185
+ else:
186
+ contact_3d_labels_.append([])
187
+ scene_seg_.append(scene_path)
188
+ part_seg_.append(part_path)
189
+ poses_.append(pose.squeeze())
190
+ transls_.append(transl.squeeze())
191
+ shapes_.append(shape.squeeze())
192
+ cams_k_.append(K.tolist())
193
+ focal_length_accumulator.append(focal_length)
194
+
195
+ print('Average focal length: ', np.mean(focal_length_accumulator))
196
+ print('Median focal length: ', np.median(focal_length_accumulator))
197
+ print('Std Dev focal length: ', np.std(focal_length_accumulator))
198
+
199
+ # store the data struct
200
+ os.makedirs(out_dir, exist_ok=True)
201
+ if record_idx is not None:
202
+ out_file = os.path.join(out_dir, f'hot_noprox_supporting_{str(include_supporting)}_{split}_{record_idx}.npz')
203
+ else:
204
+ out_file = os.path.join(out_dir, f'hot_noprox_supporting_{str(include_supporting)}_{split}_combined.npz')
205
+ np.savez(out_file, imgname=imgnames_,
206
+ pose=poses_,
207
+ transl=transls_,
208
+ shape=shapes_,
209
+ cam_k=cams_k_,
210
+ polygon_2d_contact=polygon_2d_contact_,
211
+ contact_label=contact_3d_labels_,
212
+ scene_seg=scene_seg_,
213
+ part_seg=part_seg_
214
+ )
215
+ print(f'Total number of rows: {len(imgnames_)}')
216
+ print('Saved to ', out_file)
217
+ print(f'Number of images with 3D contact labels: {num_with_3d_contact}')
218
+
219
+ if __name__ == '__main__':
220
+ parser = argparse.ArgumentParser()
221
+ parser.add_argument('--img_dataset_path', type=str, default='/ps/project/datasets/HOT/Contact_Data/')
222
+ parser.add_argument('--smpl_params_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/hot.npz')
223
+ parser.add_argument('--dca_csv_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/dca.csv')
224
+ parser.add_argument('--out_dir', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/data/dataset_extras')
225
+ parser.add_argument('--vis_path', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/temp_images')
226
+ parser.add_argument('--visualize', action='store_true', default=False)
227
+ parser.add_argument('--include_supporting', action='store_true', default=False)
228
+ parser.add_argument('--record_idx', type=int, default=None)
229
+ parser.add_argument('--split', type=str, default='train')
230
+ args = parser.parse_args()
231
+
232
+ hot_extract(img_dataset_path=args.img_dataset_path,
233
+ smpl_params_path=args.smpl_params_path,
234
+ dca_csv_path=args.dca_csv_path,
235
+ out_dir=args.out_dir,
236
+ vis_path=args.vis_path,
237
+ visualize=args.visualize,
238
+ split=args.split,
239
+ record_idx=args.record_idx,
240
+ include_supporting=args.include_supporting)
241
+
data/preprocess/hot_prox.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import sys
6
+ import imagesize
7
+ import argparse
8
+ import torch
9
+ import pandas as pd
10
+ import json
11
+
12
+ import monai.metrics as metrics
13
+
14
+ PROX_TRAIN_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/prox_train.odgt"
15
+ PROX_VAL_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/prox_validation.odgt"
16
+ PROX_TEST_SPLIT = "/ps/scratch/ps_shared/ychen2/4shashank/split/prox_test.odgt"
17
+
18
+ def metric(mask, pred, back=True):
19
+ iou = metrics.compute_meaniou(pred, mask, back, False)
20
+ iou = iou.mean()
21
+ return iou
22
+
23
+
24
+ def combine_hot_prox_split(split):
25
+ if split == 'train':
26
+ with open(PROX_TRAIN_SPLIT, "r") as f:
27
+ records = [
28
+ json.loads(line.strip("\n")) for line in f.readlines()
29
+ ]
30
+ elif split == 'val':
31
+ with open(PROX_VAL_SPLIT, "r") as f:
32
+ records = [
33
+ json.loads(line.strip("\n")) for line in f.readlines()
34
+ ]
35
+ elif split == 'test':
36
+ with open(PROX_TEST_SPLIT, "r") as f:
37
+ records = [
38
+ json.loads(line.strip("\n")) for line in f.readlines()
39
+ ]
40
+ return records
41
+
42
+ def hot_extract(img_dataset_path, smpl_params_path, dca_csv_path, out_dir, split=None, vis_path=None, visualize=False, downsample_factor=4):
43
+
44
+ n_vertices = 6890
45
+
46
+ # structs we use
47
+ imgnames_ = []
48
+ poses_, shapes_, transls_ = [], [], []
49
+ cams_k_ = []
50
+ polygon_2d_contact_ = []
51
+ contact_3d_labels_ = []
52
+ scene_seg_, part_seg_ = [], []
53
+
54
+ img_dir = os.path.join(img_dataset_path, 'images', 'training')
55
+ smpl_params = np.load(smpl_params_path)
56
+ # smpl_params = np.load(smpl_params_path, allow_pickle=True)
57
+ # smpl_params = smpl_params['arr_0'].item()
58
+ annotations_dir = img_dir.replace('images', 'annotations')
59
+ records = combine_hot_prox_split(split)
60
+
61
+ # load dca csv
62
+ dca_csv = pd.read_csv(dca_csv_path)
63
+
64
+ iou_thresh = 0
65
+
66
+ num_with_3d_contact = 0
67
+
68
+ focal_length_accumulator = []
69
+ for i, record in enumerate(tqdm(records, dynamic_ncols=True)):
70
+ imgpath = record['fpath_img']
71
+ imgname = os.path.basename(imgpath)
72
+ # save image in temp_images
73
+ if visualize:
74
+ img = cv2.imread(os.path.join(img_dir, imgname))
75
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(imgname)), img)
76
+
77
+ # load image to get the size
78
+ img_w, img_h = record["width"], record["height"]
79
+
80
+ # get mask anns
81
+ polygon_2d_contact_path = os.path.join(annotations_dir, os.path.splitext(imgname)[0] + '.png')
82
+
83
+
84
+ # Get 3D contact annotations from DCA mturk csv
85
+ dca_row = dca_csv.loc[dca_csv['imgnames'] == imgname] # if no imgnames column, run scripts/datascripts/add_imgname_column_to_deco_csv.py
86
+ if len(dca_row) == 0:
87
+ contact_3d_labels = []
88
+ else:
89
+ num_with_3d_contact += 1
90
+ supporting_object = dca_row['supporting_object'].values[0]
91
+ vertices = eval(dca_row['vertices'].values[0])
92
+ contact_3d_list = vertices[os.path.join('hot/training/', imgname)]
93
+ # Aggregate values in all keys
94
+ contact_3d_idx = []
95
+ for item in contact_3d_list:
96
+ # one iteration loop as it is a list of one dict key value
97
+ for k, v in item.items():
98
+ contact_3d_idx.extend(v)
99
+ # removed repeated values
100
+ contact_3d_idx = list(set(contact_3d_idx))
101
+ contact_3d_labels = np.zeros(n_vertices) # smpl has 6980 vertices
102
+ contact_3d_labels[contact_3d_idx] = 1.
103
+
104
+ # find indices that match the imname
105
+ inds = np.where(smpl_params['imgname'] == os.path.join(img_dir, imgname))[0]
106
+ select_inds = []
107
+ ious = []
108
+ for ind in inds:
109
+ # part mask
110
+ part_path = smpl_params['part_seg'][ind]
111
+ # load the part_mask
112
+ part_mask = cv2.imread(part_path)
113
+ # binarize the part mask
114
+ part_mask = np.where(part_mask > 0, 1, 0)
115
+ # save part mask
116
+ if visualize:
117
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(part_path)), part_mask*255)
118
+
119
+ # load gt polygon mask
120
+ polygon_2d_contact = cv2.imread(polygon_2d_contact_path)
121
+ # binarize the gt polygon mask
122
+ polygon_2d_contact = np.where(polygon_2d_contact > 0, 1, 0)
123
+
124
+ # save gt polygon mask in temp_images
125
+ if visualize:
126
+ cv2.imwrite(os.path.join(vis_path, os.path.basename(polygon_2d_contact_path)), polygon_2d_contact*255)
127
+
128
+ polygon_2d_contact = torch.from_numpy(polygon_2d_contact)[None,:].permute(0,3,1,2)
129
+ part_mask = torch.from_numpy(part_mask)[None,:].permute(0,3,1,2)
130
+ # compute iou with part mask and gt polygon mask
131
+ iou = metric(polygon_2d_contact, part_mask)
132
+ if iou > iou_thresh:
133
+ ious.append(iou)
134
+ select_inds.append(ind)
135
+
136
+ # get select_ind with maximum iou
137
+ if len(select_inds) > 0:
138
+ max_iou_ind = select_inds[np.argmax(ious)]
139
+ else:
140
+ continue
141
+
142
+ for ind in select_inds:
143
+ # part mask
144
+ part_path = smpl_params['part_seg'][ind]
145
+
146
+ # scene mask
147
+ scene_path = smpl_params['scene_seg'][ind]
148
+
149
+ # get smpl params
150
+ pose = smpl_params['pose'][ind]
151
+ shape = smpl_params['shape'][ind]
152
+ transl = smpl_params['global_t'][ind]
153
+ focal_length = smpl_params['focal_l'][ind]
154
+ camC = np.array([[img_w//2, img_h//2]])
155
+
156
+ # read GT 2D keypoints
157
+ K = np.eye(3, dtype=np.float64)
158
+ K[0, 0] = focal_length
159
+ K[1, 1] = focal_length
160
+ K[:2, 2:] = camC.T
161
+
162
+ # store data
163
+ imgnames_.append(os.path.join(img_dir, imgname))
164
+ polygon_2d_contact_.append(polygon_2d_contact_path)
165
+ # we use the heuristic that the 3D contact labeled is for the person with maximum iou with HOT contacts
166
+ if ind == max_iou_ind:
167
+ contact_3d_labels_.append(contact_3d_labels)
168
+ else:
169
+ contact_3d_labels_.append([])
170
+ scene_seg_.append(scene_path)
171
+ part_seg_.append(part_path)
172
+ poses_.append(pose.squeeze())
173
+ transls_.append(transl.squeeze())
174
+ shapes_.append(shape.squeeze())
175
+ cams_k_.append(K.tolist())
176
+ focal_length_accumulator.append(focal_length)
177
+
178
+ print('Average focal length: ', np.mean(focal_length_accumulator))
179
+ print('Median focal length: ', np.median(focal_length_accumulator))
180
+ print('Std Dev focal length: ', np.std(focal_length_accumulator))
181
+
182
+ # store the data struct
183
+ os.makedirs(out_dir, exist_ok=True)
184
+ out_file = os.path.join(out_dir, f'hot_prox_{split}.npz')
185
+ np.savez(out_file, imgname=imgnames_,
186
+ pose=poses_,
187
+ transl=transls_,
188
+ shape=shapes_,
189
+ cam_k=cams_k_,
190
+ polygon_2d_contact=polygon_2d_contact_,
191
+ contact_label=contact_3d_labels_,
192
+ scene_seg=scene_seg_,
193
+ part_seg=part_seg_
194
+ )
195
+ print(f'Total number of rows: {len(imgnames_)}')
196
+ print('Saved to ', out_file)
197
+ print(f'Number of images with 3D contact labels: {num_with_3d_contact}')
198
+
199
+ if __name__ == '__main__':
200
+ parser = argparse.ArgumentParser()
201
+ parser.add_argument('--img_dataset_path', type=str, default='/ps/project/datasets/HOT/Contact_Data/')
202
+ parser.add_argument('--smpl_params_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/hot.npz')
203
+ parser.add_argument('--dca_csv_path', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/dca.csv')
204
+ parser.add_argument('--out_dir', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/data/dataset_extras')
205
+ parser.add_argument('--vis_path', type=str, default='/is/cluster/work/stripathi/pycharm_remote/dca_contact/temp_images')
206
+ parser.add_argument('--visualize', action='store_true', default=False)
207
+ parser.add_argument('--split', type=str, default='train')
208
+ args = parser.parse_args()
209
+
210
+ hot_extract(img_dataset_path=args.img_dataset_path,
211
+ smpl_params_path=args.smpl_params_path,
212
+ dca_csv_path=args.dca_csv_path,
213
+ out_dir=args.out_dir,
214
+ vis_path=args.vis_path,
215
+ visualize=args.visualize,
216
+ split=args.split)
217
+
data/preprocess/prepare_damon_behave_split.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path as osp
2
+ import os
3
+ import shutil
4
+ import json
5
+ import argparse
6
+ import numpy as np
7
+ from PIL import Image
8
+ from tqdm import tqdm
9
+
10
+ objects = {
11
+ "backpack": 24,
12
+ "chair": 56,
13
+ "keyboard": 66,
14
+ "suitcase": 28
15
+ }
16
+
17
+ def copy_images_to_behave_format(in_img_dir, in_image_list, in_part_dir, in_seg_dir, out_dir):
18
+ """
19
+ Copy images from in_img_dir to out_dir
20
+ :param in_img_dir: input directory containing images
21
+ :param out_dir: output directory to copy images to
22
+ :return:
23
+ """
24
+ # read image list
25
+ with open(in_image_list, 'r') as fp:
26
+ img_list_dict = json.load(fp)
27
+
28
+ for k, v in img_list_dict.items():
29
+ out_dir_object = osp.join(out_dir, k)
30
+ os.makedirs(out_dir_object, exist_ok=True)
31
+ # copy images to out_dir
32
+ for img_name in tqdm(v, dynamic_ncols=True):
33
+ input_image_path = osp.join(in_img_dir, img_name)
34
+ input_part_path = osp.join(in_part_dir, img_name.replace('.jpg', '_0.png'))
35
+ input_seg_path = osp.join(in_seg_dir, img_name.replace('.jpg', '.png'))
36
+ if not osp.exists(input_part_path) or not osp.exists(input_image_path) or not osp.exists(input_seg_path):
37
+ print(f'{input_image_path} or {input_part_path} or {input_seg_path} does not exist')
38
+ continue
39
+ out_dir_image = osp.join(out_dir_object, img_name)
40
+ os.makedirs(out_dir_image, exist_ok=True)
41
+ shutil.copy(input_image_path, osp.join(out_dir_image, 'k1.color.jpg'))
42
+
43
+ # load body mask
44
+ body_mask = Image.open(input_part_path)
45
+ # convert all non-zero pixels to 255
46
+ body_mask = np.array(body_mask)
47
+ body_mask[body_mask > 0] = 255
48
+ body_mask = Image.fromarray(body_mask)
49
+ body_mask.save(osp.join(out_dir_image, 'k1.person_mask.png'))
50
+
51
+ # load seg mask
52
+ body_mask = Image.open(input_seg_path)
53
+ # convert all non-object pixels to 255
54
+ body_mask = np.array(body_mask)
55
+ object_num = objects[k]
56
+ body_mask[body_mask == object_num] = 255
57
+ body_mask[body_mask != 255] = 0
58
+ body_mask = Image.fromarray(body_mask)
59
+ body_mask.save(osp.join(out_dir_image, 'k1.object_rend.png'))
60
+
61
+ if __name__ == '__main__':
62
+ parser = argparse.ArgumentParser()
63
+ parser.add_argument('--in_img_dir', type=str, default='/ps/project/datasets/HOT/Contact_Data/images/training')
64
+ parser.add_argument('--in_part_dir', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot/parts/training')
65
+ parser.add_argument('--in_seg_dir', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot_behave_split/agniv/masks')
66
+ parser.add_argument('--in_image_list', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot_behave_split/imgnames_per_object_dict.json')
67
+ parser.add_argument('--out_dir', type=str, default='/ps/scratch/ps_shared/stripathi/deco/4agniv/hot_behave_split/training')
68
+ args = parser.parse_args()
69
+ copy_images_to_behave_format(args.in_img_dir, args.in_image_list, args.in_part_dir, args.in_seg_dir, args.out_dir)
data/preprocess/rich_smplx.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["CDF_LIB"] = "/is/cluster/scratch/stripathi/data/cdf37_1-dist/src/lib"
3
+
4
+ import cv2
5
+ import pandas as pd
6
+ import json
7
+ import glob
8
+ import h5py
9
+ import torch
10
+ import trimesh
11
+ import numpy as np
12
+ import pickle as pkl
13
+ from xml.dom import minidom
14
+ import xml.etree.ElementTree as ET
15
+ from tqdm import tqdm
16
+ from spacepy import pycdf
17
+ # from .read_openpose import read_openpose
18
+ import sys
19
+ sys.path.append('../../')
20
+ from models import hmr, SMPL
21
+ import config
22
+ import constants
23
+
24
+ import shutil
25
+
26
+ import smplx
27
+ import pytorch3d.transforms as p3dt
28
+
29
+ from utils.geometry import batch_rodrigues, batch_rot2aa, ea2rm
30
+
31
+
32
+ model_type = 'smplx'
33
+ model_folder = '/ps/project/common/smplifyx/models/'
34
+ body_model_params = dict(model_path=model_folder,
35
+ model_type=model_type,
36
+ create_global_orient=True,
37
+ create_body_pose=True,
38
+ create_betas=True,
39
+ num_betas=10,
40
+ create_left_hand_pose=True,
41
+ create_right_hand_pose=True,
42
+ create_expression=True,
43
+ create_jaw_pose=True,
44
+ create_leye_pose=True,
45
+ create_reye_pose=True,
46
+ create_transl=True,
47
+ use_pca=False)
48
+ body_model = smplx.create(gender='neutral', **body_model_params).to('cuda')
49
+
50
+ def rich_extract(img_dataset_path, out_path, split=None, vis_path=None, visualize=False, downsample_factor=4):
51
+
52
+ # structs we use
53
+ imgnames_ = []
54
+ poses_, shapes_, transls_ = [], [], []
55
+ cams_k_ = []
56
+ contact_label_ = []
57
+ scene_seg_, part_seg_ = [], []
58
+
59
+ for i, fl in tqdm(enumerate(sorted(os.listdir(os.path.join(img_dataset_path, 'images', split)))), dynamic_ncols=True):
60
+ ind = fl.index('cam')
61
+ location = fl[:ind-1]
62
+
63
+ cam_num = fl[ind:ind+6]
64
+
65
+ img = fl[ind+7:-3] + 'jpeg'
66
+
67
+ imgname = os.path.join(location, cam_num, img)
68
+
69
+ mask_name = fl
70
+ sp = mask_name.split('_')
71
+ indx = mask_name.index('cam')
72
+ st = mask_name[indx-1:indx+7]
73
+ mask_name = mask_name.replace(st, '/')
74
+ mask_name = mask_name[:-7]
75
+ new_p = mask_name.split('/')
76
+ mask_name = new_p[0] + '/' + new_p[1] + '/' + sp[1] + '.pkl'
77
+ mask_path = os.path.join(img_dataset_path, 'labels', split, mask_name)
78
+ df = pd.read_pickle(mask_path)
79
+ mask = df['contact']
80
+
81
+ scene_path = os.path.join(img_dataset_path, 'segmentation_masks', split, fl[:-3] + 'png')
82
+
83
+ part_path = os.path.join(img_dataset_path, 'parts', split, fl[:-3] + 'png')
84
+
85
+ dataset_path = '/ps/project/datasets/RICH'
86
+
87
+ ind = fl.index('cam')
88
+ frame_id = fl[:ind-1]
89
+ location = frame_id.split('_')[0]
90
+
91
+ if location == 'LectureHall':
92
+ if 'chair' in frame_id:
93
+ cam2world_location = location + '_' + 'chair'
94
+ else:
95
+ cam2world_location = location + '_' + 'yoga'
96
+ else:
97
+ cam2world_location = location
98
+
99
+ img_num = fl.split('_')[-2]
100
+
101
+ cam_num = int(fl.split('_')[-1][:2])
102
+
103
+ # get ioi2scan transformation per sequence
104
+ ioi2scan_fn = os.path.join(dataset_path, 'website_release/multicam2world', cam2world_location + '_multicam2world.json')
105
+
106
+ try:
107
+ camera_fn = os.path.join(dataset_path, 'rich_toolkit/data/scan_calibration', location, f'calibration/{cam_num:03d}.xml')
108
+ focal_length_x, focal_length_y, camC, camR, camT, _, _, _ = extract_cam_param_xml(camera_fn)
109
+ except:
110
+ print(f'camera calibration file not found: {camera_fn}')
111
+ continue
112
+
113
+ # path to smpl params
114
+ smplx_param = os.path.join(dataset_path, 'rich_toolkit/data/bodies', split, frame_id, str(img_num), frame_id.split('_')[1] + '.pkl')
115
+
116
+ # get smpl parameters
117
+ ## body resides in multi-ioi coordidate, where camera 0 is world zero.
118
+ with open(smplx_param, 'rb') as f:
119
+ body_params = pkl.load(f)
120
+ # in ioi coordinates: cam 0
121
+ beta = body_params['betas']
122
+ pose_aa = body_params['body_pose']
123
+ pose_rotmat = p3dt.axis_angle_to_matrix(torch.FloatTensor(pose_aa.reshape(-1,3))).numpy()
124
+
125
+ transl = body_params['transl']
126
+ global_orient = body_params['global_orient']
127
+ global_orient = p3dt.axis_angle_to_matrix(torch.FloatTensor(global_orient.reshape(-1,3))).numpy()
128
+
129
+ smpl_body_cam0 = body_model(betas=torch.FloatTensor(beta).to('cuda')) # canonical body with shape
130
+ vertices_cam0 = smpl_body_cam0.vertices.detach().cpu().numpy().squeeze()
131
+ joints_cam0 = smpl_body_cam0.joints.detach().cpu().numpy()
132
+ pelvis_cam0 = joints_cam0[:, 0, :]
133
+
134
+ # ## rigid transformation between multi-ioi and Leica scan (world)
135
+ with open(ioi2scan_fn, 'r') as f:
136
+ ioi2scan_dict = json.load(f)
137
+ R_ioi2world = np.array(ioi2scan_dict['R']) # Note: R is transposed
138
+ t_ioi2world= np.array(ioi2scan_dict['t']).reshape(1, 3)
139
+
140
+ # # get SMPL params in camera coordinates
141
+ global_orient_cam = np.matmul(np.array(camR), global_orient)
142
+ full_pose_rotmat_cam = np.concatenate((global_orient_cam, pose_rotmat), axis=0).squeeze()
143
+ theta_cam = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_cam)).reshape(-1, 66).cpu().numpy()
144
+
145
+ # read GT 2D keypoints
146
+ K = np.eye(3, dtype=np.float64)
147
+ K[0, 0] = focal_length_x / downsample_factor
148
+ K[1, 1] = focal_length_y / downsample_factor
149
+ K[:2, 2:] = camC.T / downsample_factor
150
+
151
+ # get camera parameters wrt to scan
152
+ R_worldtocam = np.matmul(camR, R_ioi2world) # Note: R_ioi2world is transposed
153
+ T_worldtocam = -t_ioi2world + camT
154
+
155
+ # store data
156
+ imgnames_.append(os.path.join('/ps/project/datasets/RICH_JPG', split, imgname))
157
+ contact_label_.append(mask)
158
+ scene_seg_.append(scene_path)
159
+ part_seg_.append(part_path)
160
+ poses_.append(theta_cam.squeeze())
161
+ transls_.append(transl.squeeze())
162
+ shapes_.append(beta.squeeze())
163
+ cams_k_.append(K.tolist())
164
+
165
+ # store the data struct
166
+ if not os.path.isdir(out_path):
167
+ os.makedirs(out_path)
168
+ out_file = os.path.join(out_path, f'rich_{split}_smplx.npz')
169
+ np.savez(out_file, imgname=imgnames_,
170
+ pose=poses_,
171
+ transl=transls_,
172
+ shape=shapes_,
173
+ cam_k=cams_k_,
174
+ contact_label=contact_label_,
175
+ scene_seg=scene_seg_,
176
+ part_seg=part_seg_
177
+ )
178
+ print('Saved to ', out_file)
179
+
180
+ def rectify_pose(camera_r, body_aa):
181
+ body_r = batch_rodrigues(body_aa).reshape(-1,3,3)
182
+ final_r = camera_r @ body_r
183
+ body_aa = batch_rot2aa(final_r)
184
+ return body_aa
185
+
186
+
187
+ def extract_cam_param_xml(xml_path: str = '', dtype=float):
188
+ import xml.etree.ElementTree as ET
189
+ tree = ET.parse(xml_path)
190
+
191
+ extrinsics_mat = [float(s) for s in tree.find('./CameraMatrix/data').text.split()]
192
+ intrinsics_mat = [float(s) for s in tree.find('./Intrinsics/data').text.split()]
193
+ distortion_vec = [float(s) for s in tree.find('./Distortion/data').text.split()]
194
+
195
+ focal_length_x = intrinsics_mat[0]
196
+ focal_length_y = intrinsics_mat[4]
197
+ center = np.array([[intrinsics_mat[2], intrinsics_mat[5]]], dtype=dtype)
198
+
199
+ rotation = np.array([[extrinsics_mat[0], extrinsics_mat[1], extrinsics_mat[2]],
200
+ [extrinsics_mat[4], extrinsics_mat[5], extrinsics_mat[6]],
201
+ [extrinsics_mat[8], extrinsics_mat[9], extrinsics_mat[10]]], dtype=dtype)
202
+
203
+ translation = np.array([[extrinsics_mat[3], extrinsics_mat[7], extrinsics_mat[11]]], dtype=dtype)
204
+
205
+ # t = -Rc --> c = -R^Tt
206
+ cam_center = [-extrinsics_mat[0] * extrinsics_mat[3] - extrinsics_mat[4] * extrinsics_mat[7] - extrinsics_mat[8] *
207
+ extrinsics_mat[11],
208
+ -extrinsics_mat[1] * extrinsics_mat[3] - extrinsics_mat[5] * extrinsics_mat[7] - extrinsics_mat[9] *
209
+ extrinsics_mat[11],
210
+ -extrinsics_mat[2] * extrinsics_mat[3] - extrinsics_mat[6] * extrinsics_mat[7] - extrinsics_mat[10] *
211
+ extrinsics_mat[11]]
212
+
213
+ cam_center = np.array([cam_center], dtype=dtype)
214
+
215
+ k1 = np.array([distortion_vec[0]], dtype=dtype)
216
+ k2 = np.array([distortion_vec[1]], dtype=dtype)
217
+
218
+ return focal_length_x, focal_length_y, center, rotation, translation, cam_center, k1, k2
219
+
220
+ rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='train')
221
+ rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='val')
222
+ rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='test')
data/preprocess/rich_smplx_agniv.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["CDF_LIB"] = "/is/cluster/scratch/stripathi/data/cdf37_1-dist/src/lib"
3
+
4
+ import cv2
5
+ import pandas as pd
6
+ # import json
7
+ # import glob
8
+ # import h5py
9
+ import torch
10
+ # import trimesh
11
+ import numpy as np
12
+ import pickle as pkl
13
+ # from xml.dom import minidom
14
+ # import xml.etree.ElementTree as ET
15
+ from tqdm import tqdm
16
+ from spacepy import pycdf
17
+ # from .read_openpose import read_openpose
18
+ import sys
19
+ sys.path.append('../../')
20
+ # from models import hmr, SMPL
21
+ # import config
22
+ # import constants
23
+ import argparse
24
+
25
+ # import shutil
26
+
27
+ import smplx
28
+ import pytorch3d.transforms as p3dt
29
+
30
+ from utils.geometry import batch_rodrigues, batch_rot2aa, ea2rm
31
+ # from vis_utils.world_vis import overlay_mesh, vis_smpl_with_ground
32
+
33
+
34
+ model_type = 'smplx'
35
+ model_folder = '/ps/project/common/smplifyx/models/'
36
+ body_model_params = dict(model_path=model_folder,
37
+ model_type=model_type,
38
+ create_global_orient=True,
39
+ create_body_pose=True,
40
+ create_betas=True,
41
+ num_betas=10,
42
+ create_left_hand_pose=True,
43
+ create_right_hand_pose=True,
44
+ create_expression=True,
45
+ create_jaw_pose=True,
46
+ create_leye_pose=True,
47
+ create_reye_pose=True,
48
+ create_transl=True,
49
+ use_pca=False)
50
+ body_model = smplx.create(gender='neutral', **body_model_params).to('cuda')
51
+
52
+ def rich_extract(img_dataset_path, out_path, scene_indx, split=None, vis_path=None, visualize=False, downsample_factor=1):
53
+
54
+ # structs we use
55
+ imgnames_ = []
56
+ # scales_, centers_, parts_, Ss_, Ss_world_, openposes_ = [], [], [], [], [], []
57
+ poses_, shapes_, transls_ = [], [], []
58
+ # poses_world_, transls_world_, cams_r_, cams_t_ = [], [], [], []
59
+ cams_k_ = []
60
+ # ground_offset_ = []
61
+ # in_bos_label_ = []
62
+ contact_label_ = []
63
+ scene_seg_, part_seg_ = [], []
64
+
65
+ # # seqs in validation set
66
+ # if split == 'val':
67
+ # seq_list = ['2021-06-15_Multi_IOI_ID_00176_Yoga2',
68
+ # '2021-06-15_Multi_IOI_ID_00228_Yoga1',
69
+ # '2021-06-15_Multi_IOI_ID_03588_Yoga1',
70
+ # '2021-06-15_Multi_IOI_ID_00176_Yoga1']
71
+
72
+ # # seqs in testing set
73
+ # if split == 'test':
74
+ # seq_list = ['2021-06-15_Multi_IOI_ID_00186_Yoga1',
75
+ # '2021-06-15_Multi_IOI_ID_03588_Yoga2',
76
+ # 'MultiIOI_201019_ID03581_parkingLot_Calibration06_Settings06_PushUp__2',
77
+ # 'Multi-IOI_ID00227_Scene_ParkingLot_Calibration_03_CameraSettings_4_pushup_1']
78
+
79
+ scenes = sorted(os.listdir('/ps/project/datasets/RICH/rich_toolkit/data/images/test'))
80
+ # scene = scenes[scene_indx]
81
+ scene = 'ParkingLot2_009_impro5'
82
+
83
+ for scene_name in [scene]:
84
+ # for scene_name in ['LectureHall_003_wipingchairs1']:
85
+ out_file = os.path.join(out_path, f'rich_{scene_name}_smplx.npz')
86
+ if os.path.exists(out_file): return
87
+ print(scene_name)
88
+ for i, fl in tqdm(enumerate(sorted(os.listdir(os.path.join(img_dataset_path, 'images', split)))), dynamic_ncols=True):
89
+ if not scene_name in fl: continue
90
+
91
+ ind = fl.index('cam')
92
+ location = fl[:ind-1]
93
+
94
+ cam_num = fl[ind:ind+6]
95
+
96
+ img = fl[ind+7:-3] + 'jpeg'
97
+
98
+ imgname = os.path.join(location, cam_num, img)
99
+
100
+ mask_name = fl
101
+ sp = mask_name.split('_')
102
+ indx = mask_name.index('cam')
103
+ st = mask_name[indx-1:indx+7]
104
+ mask_name = mask_name.replace(st, '/')
105
+ mask_name = mask_name[:-7]
106
+ new_p = mask_name.split('/')
107
+ mask_name = new_p[0] + '/' + new_p[1] + '/' + sp[1] + '.pkl'
108
+ mask_path = os.path.join(img_dataset_path, 'labels', split, mask_name)
109
+ df = pd.read_pickle(mask_path)
110
+ mask = df['contact']
111
+
112
+ scene_path = os.path.join('/ps/scratch/ps_shared/stripathi/deco/4agniv/rich/seg_masks_new', split, fl[:-3] + 'png')
113
+
114
+ part_path = os.path.join('/ps/scratch/ps_shared/stripathi/deco/4agniv/rich/part_masks_new', split, fl[:-3] + 'png')
115
+
116
+ dataset_path = '/ps/project/datasets/RICH'
117
+
118
+ ind = fl.index('cam')
119
+ frame_id = fl[:ind-1]
120
+ location = frame_id.split('_')[0]
121
+
122
+ if location == 'LectureHall':
123
+ if 'chair' in frame_id:
124
+ cam2world_location = location + '_' + 'chair'
125
+ else:
126
+ cam2world_location = location + '_' + 'yoga'
127
+ else:
128
+ cam2world_location = location
129
+
130
+ img_num = fl.split('_')[-2]
131
+
132
+ cam_num = int(fl.split('_')[-1][:2])
133
+
134
+ # get ioi2scan transformation per sequence
135
+ ioi2scan_fn = os.path.join(dataset_path, 'website_release/multicam2world', cam2world_location + '_multicam2world.json')
136
+
137
+ try:
138
+ camera_fn = os.path.join(dataset_path, 'rich_toolkit/data/scan_calibration', location, f'calibration/{cam_num:03d}.xml')
139
+ focal_length_x, focal_length_y, camC, camR, camT, _, _, _ = extract_cam_param_xml(camera_fn)
140
+ except:
141
+ print(f'camera calibration file not found: {camera_fn}')
142
+ continue
143
+
144
+ # print('X: ', focal_length_x)
145
+ # print('Y: ', focal_length_y)
146
+
147
+ # path to smpl params
148
+ smplx_param = os.path.join(dataset_path, 'rich_toolkit/data/bodies', split, frame_id, str(img_num), frame_id.split('_')[1] + '.pkl')
149
+
150
+ # # path to GT bounding boxes
151
+ # bbox_path = os.path.join(dataset_path, 'preprocessed', split, frame_id, img_num, frame_id.split('_')[1], 'bbox_refine', f'{img_num}_{cam_num:02d}.json')
152
+ # # path with 2D openpose keypoints
153
+ # openpose_path = os.path.join(dataset_path, 'preprocessed', split, frame_id, img_num, frame_id.split('_')[1], 'keypoints_refine', f'{img_num}_{str(cam_num).zfill(2)}_keypoints.json')
154
+ # # path to image crops
155
+ # img_path = os.path.join(dataset_path, 'preprocessed', split, frame_id, img_num, frame_id.split('_')[1], 'images_refine', f'{img_num}_{cam_num:02d}.png')
156
+
157
+ # # bbox file
158
+ # try:
159
+ # with open(bbox_path, 'r') as f:
160
+ # bbox_dict = json.load(f)
161
+ # except:
162
+ # print(f'bbox file not found: {bbox_path}')
163
+ # continue
164
+
165
+ # # read GT bounding box
166
+ # x1_ul = bbox_dict['x1'] // downsample_factor
167
+ # y1_ul = bbox_dict['y1'] // downsample_factor
168
+ # x2_br = bbox_dict['x2'] // downsample_factor
169
+ # y2_br = bbox_dict['y2'] // downsample_factor
170
+ # bbox = np.array([x1_ul, y1_ul, x2_br, y2_br])
171
+ # center = [(bbox[2]+bbox[0])/2, (bbox[3]+bbox[1])/2]
172
+ # scale = 0.9 * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200.
173
+
174
+ # get smpl parameters
175
+ ## body resides in multi-ioi coordidate, where camera 0 is world zero.
176
+ with open(smplx_param, 'rb') as f:
177
+ body_params = pkl.load(f)
178
+ # in ioi coordinates: cam 0
179
+ beta = body_params['betas']
180
+ pose_aa = body_params['body_pose']
181
+ pose_rotmat = p3dt.axis_angle_to_matrix(torch.FloatTensor(pose_aa.reshape(-1,3))).numpy()
182
+
183
+ transl = body_params['transl']
184
+ global_orient = body_params['global_orient']
185
+ global_orient = p3dt.axis_angle_to_matrix(torch.FloatTensor(global_orient.reshape(-1,3))).numpy()
186
+
187
+ smpl_body_cam0 = body_model(betas=torch.FloatTensor(beta).to('cuda')) # canonical body with shape
188
+ vertices_cam0 = smpl_body_cam0.vertices.detach().cpu().numpy().squeeze()
189
+ joints_cam0 = smpl_body_cam0.joints.detach().cpu().numpy()
190
+ pelvis_cam0 = joints_cam0[:, 0, :]
191
+
192
+ # ## rigid transformation between multi-ioi and Leica scan (world)
193
+ # with open(ioi2scan_fn, 'r') as f:
194
+ # ioi2scan_dict = json.load(f)
195
+ # R_ioi2world = np.array(ioi2scan_dict['R']) # Note: R is transposed
196
+ # t_ioi2world= np.array(ioi2scan_dict['t']).reshape(1, 3)
197
+
198
+ # # # get SMPL params in world coordinates
199
+ # # # import ipdb; ipdb.set_trace()
200
+ # global_orient_world = np.matmul(R_ioi2world.T, global_orient)
201
+ # transl_world = np.matmul((pelvis_cam0+transl), R_ioi2world) + t_ioi2world - pelvis_cam0 # right multiplication to avoid transpose
202
+ # full_pose_rotmat_world = np.concatenate((global_orient_world, pose_rotmat), axis=0).squeeze()
203
+ # theta_world = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_world)).reshape(-1, 66).cpu().numpy()
204
+
205
+ # smpl_body_world = body_model(betas=torch.FloatTensor(beta).to('cuda'),
206
+ # body_pose=torch.FloatTensor(theta_world[:, 3:]).to('cuda'),
207
+ # transl=torch.FloatTensor(transl_world).to('cuda'),
208
+ # global_orient=torch.FloatTensor(theta_world[:, :3]).to('cuda'))
209
+ # vertices_world = smpl_body_world.vertices.detach().cpu().numpy().squeeze()
210
+ # joints3d_world = smpl_body_world.joints[:, 25:, :].detach().cpu().numpy().squeeze()
211
+
212
+ # # get SMPL params in camera coordinates
213
+ global_orient_cam = np.matmul(np.array(camR), global_orient)
214
+ transl_cam = np.matmul(camR, (pelvis_cam0 + transl).T).T + camT - pelvis_cam0
215
+ full_pose_rotmat_cam = np.concatenate((global_orient_cam, pose_rotmat), axis=0).squeeze()
216
+ theta_cam = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_cam)).reshape(-1, 66).cpu().numpy()
217
+
218
+ # read GT 2D keypoints
219
+ K = np.eye(3, dtype=np.float64)
220
+ K[0, 0] = focal_length_x / downsample_factor
221
+ K[1, 1] = focal_length_y / downsample_factor
222
+ K[:2, 2:] = camC.T / downsample_factor
223
+
224
+ # # get openpose 2D keypoints
225
+ # try:
226
+ # with open(openpose_path, 'r') as f:
227
+ # openpose = json.load(f)
228
+ # openpose = np.array(openpose['people'][0]['pose_keypoints_2d']).reshape([-1, 3])
229
+ # except:
230
+ # print(f'No openpose !! Missing {openpose_path}')
231
+ # continue
232
+
233
+ # get camera parameters wrt to scan
234
+ # R_worldtocam = np.matmul(camR, R_ioi2world) # Note: R_ioi2world is transposed
235
+ # T_worldtocam = -t_ioi2world + camT
236
+
237
+ # ground offset
238
+ # ground_offset = ground_eq[2]
239
+
240
+ # store data
241
+ jpg_img_path = os.path.join('/ps/project/datasets/RICH_JPG', split, imgname)
242
+ bmp_img_path = jpg_img_path.replace('/ps/project/datasets/RICH_JPG', '/ps/project/datasets/RICH/rich_toolkit/data/images')
243
+ bmp_img_path = bmp_img_path.replace('.jpeg', '.bmp')
244
+ if not os.path.exists(bmp_img_path):
245
+ bmp_img_path = bmp_img_path.replace('.bmp', '.png')
246
+ imgnames_.append(bmp_img_path)
247
+ contact_label_.append(mask)
248
+ scene_seg_.append(scene_path)
249
+ part_seg_.append(part_path)
250
+ # centers_.append(center)
251
+ # scales_.append(scale)
252
+ # openposes_.append(openpose)
253
+ poses_.append(theta_cam.squeeze())
254
+ transls_.append(transl_cam.squeeze())
255
+ # poses_world_.append(theta_world.squeeze())
256
+ # transls_world_.append(transl_world.squeeze())
257
+ shapes_.append(beta.squeeze())
258
+ # cams_r_.append(R_worldtocam.tolist())
259
+ # # Todo: note that T_worldtocam here is (1,3) whereas in h36m T_worldtocam is (1,3)
260
+ # cams_t_.append(T_worldtocam.tolist())
261
+ cams_k_.append(K.tolist())
262
+ # ground_offset_.append(ground_offset)
263
+
264
+ # for seq_i in tqdm(seq_list):
265
+ # print(f'Processing sequence: {seq_i}')
266
+
267
+ # # path with GT bounding boxes
268
+ # params_path = os.path.join(dataset_path, seq_i, 'params')
269
+
270
+ # # path to metadata for files
271
+ # md_path = os.path.join(dataset_path, seq_i, 'data')
272
+
273
+ # # glob all folders in params path
274
+ # frame_param_paths = sorted(glob.glob(os.path.join(params_path, '*')))
275
+ # frame_param_paths = [p for p in frame_param_paths if '.yaml' not in p]
276
+
277
+ # # get ioi2scan transformation per sequence
278
+ # ioi2scan_fn = os.path.join(dataset_path, seq_i, 'cam2scan.json')
279
+
280
+ # ## ground resides in Leica scan coordinate, which is (roughly) axis aligned.
281
+ # # ground_mesh = trimesh.load(os.path.join(dataset_path, seq_i, 'ground_mesh.ply'), process=False)
282
+ # # ground_eq = np.mean(ground_mesh.vertices, axis=0)
283
+
284
+ # # list all files in the folder
285
+ # cam_files = os.listdir(os.path.join(dataset_path, seq_i, f'calibration'))
286
+ # cam_list = sorted([int(os.path.splitext(f)[0]) for f in cam_files if '.xml' in f])
287
+
288
+ # # if split == 'val':
289
+ # # cam_list = cam_list[1:] # remove first camera in val
290
+ # for cam_num in cam_list[:1]:
291
+ # camera_fn = os.path.join(dataset_path, seq_i, f'calibration/{cam_num:03d}.xml')
292
+ # focal_length_x, focal_length_y, camC, camR, camT, _, _, _ = extract_cam_param_xml(camera_fn)
293
+
294
+ # for frame_param_path in tqdm(frame_param_paths):
295
+ # frame_id = os.path.basename(frame_param_path)
296
+ # frame_num = int(frame_id)
297
+
298
+ # # path to smpl params
299
+ # try:
300
+ # smplx_param = os.path.join(frame_param_path, '00', 'results/000.pkl')
301
+ # except:
302
+ # import ipdb; ipdb.set_trace()
303
+
304
+ # # path to GT bounding boxes
305
+ # bbox_path = os.path.join(md_path, frame_id, '00', 'bbox_refine', f'{frame_id}_{cam_num:02d}.json')
306
+ # # path with 2D openpose keypoints
307
+ # openpose_path = os.path.join(md_path, frame_id, '00', 'keypoints_refine', f'{frame_id}_{str(cam_num).zfill(2)}_keypoints.json')
308
+ # # path to image crops
309
+ # if downsample_factor == 1:
310
+ # img_path = os.path.join(md_path, frame_id, '00', 'images_orig', f'{frame_id}_{cam_num:02d}.png')
311
+ # else:
312
+ # img_path = os.path.join(md_path, frame_id, '00', 'images_orig_720p', f'{frame_id}_{cam_num:02d}.png')
313
+
314
+ # if not os.path.isfile(img_path):
315
+ # print(f'image not found: {img_path}')
316
+ # continue
317
+ # # raise FileNotFoundError
318
+
319
+ # # bbox file
320
+ # try:
321
+ # with open(bbox_path, 'r') as f:
322
+ # bbox_dict = json.load(f)
323
+ # except:
324
+ # print(f'bbox file not found: {bbox_path}')
325
+ # continue
326
+ # # read GT bounding box
327
+ # x1_ul = bbox_dict['x1'] // downsample_factor
328
+ # y1_ul = bbox_dict['y1'] // downsample_factor
329
+ # x2_br = bbox_dict['x2'] // downsample_factor
330
+ # y2_br = bbox_dict['y2'] // downsample_factor
331
+ # bbox = np.array([x1_ul, y1_ul, x2_br, y2_br])
332
+ # center = [(bbox[2]+bbox[0])/2, (bbox[3]+bbox[1])/2]
333
+ # scale = 0.9 * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200.
334
+
335
+ # # get smpl parameters
336
+ # ## body resides in multi-ioi coordidate, where camera 0 is world zero.
337
+ # with open(smplx_param, 'rb') as f:
338
+ # body_params = pkl.load(f)
339
+ # # in ioi coordinates: cam 0
340
+ # beta = body_params['betas']
341
+ # pose_aa = body_params['body_pose']
342
+ # pose_rotmat = p3dt.axis_angle_to_matrix(torch.FloatTensor(pose_aa.reshape(-1,3))).numpy()
343
+
344
+ # transl = body_params['transl']
345
+ # global_orient = body_params['global_orient']
346
+ # global_orient = p3dt.axis_angle_to_matrix(torch.FloatTensor(global_orient.reshape(-1,3))).numpy()
347
+
348
+ # smpl_body_cam0 = body_model(betas=torch.FloatTensor(beta).to('cuda')) # canonical body with shape
349
+ # vertices_cam0 = smpl_body_cam0.vertices.detach().cpu().numpy().squeeze()
350
+ # joints_cam0 = smpl_body_cam0.joints.detach().cpu().numpy()
351
+ # pelvis_cam0 = joints_cam0[:, 0, :]
352
+
353
+
354
+ # ## rigid transformation between multi-ioi and Leica scan (world)
355
+ # with open(ioi2scan_fn, 'r') as f:
356
+ # ioi2scan_dict = json.load(f)
357
+ # R_ioi2world = np.array(ioi2scan_dict['R']) # Note: R is transposed
358
+ # t_ioi2world= np.array(ioi2scan_dict['t']).reshape(1, 3)
359
+
360
+ # # get SMPL params in world coordinates
361
+ # # import ipdb; ipdb.set_trace()
362
+ # global_orient_world = np.matmul(R_ioi2world.T, global_orient)
363
+ # transl_world = np.matmul((pelvis_cam0+transl), R_ioi2world) + t_ioi2world - pelvis_cam0 # right multiplication to avoid transpose
364
+ # full_pose_rotmat_world = np.concatenate((global_orient_world, pose_rotmat), axis=0).squeeze()
365
+ # theta_world = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_world)).reshape(-1, 66).cpu().numpy()
366
+
367
+ # smpl_body_world = body_model(betas=torch.FloatTensor(beta).to('cuda'),
368
+ # body_pose=torch.FloatTensor(theta_world[:, 3:]).to('cuda'),
369
+ # transl=torch.FloatTensor(transl_world).to('cuda'),
370
+ # global_orient=torch.FloatTensor(theta_world[:, :3]).to('cuda'))
371
+ # vertices_world = smpl_body_world.vertices.detach().cpu().numpy().squeeze()
372
+ # joints3d_world = smpl_body_world.joints[:, 25:, :].detach().cpu().numpy().squeeze()
373
+
374
+ # mesh = trimesh.Trimesh(vertices_world, body_model.faces,
375
+ # process=False,
376
+ # maintain_order=True)
377
+ # mesh.export('gt_mesh_world_smplx.obj')
378
+
379
+
380
+
381
+ # # smpl_body_world = body_model(betas=torch.FloatTensor(beta).to('cuda'),
382
+ # # body_pose=torch.FloatTensor(pose_rotmat[None, ...]).to('cuda'),
383
+ # # transl=torch.FloatTensor(transl_world[None, ...]).to('cuda'),
384
+ # # global_orient=torch.FloatTensor(global_orient_world[None, ...]).to('cuda'),
385
+ # # left_hand_pose=torch.eye(3).reshape(1, 1, 3, 3).expand(1, 15, -1, -1).to('cuda'),
386
+ # # right_hand_pose=torch.eye(3).reshape(1, 1, 3, 3).expand(1, 15, -1, -1).to('cuda'),
387
+ # # leye_pose= torch.eye(3).reshape(1, 1, 3, 3).expand(batch_size, -1, -1, -1),
388
+ # #
389
+ # # pose2rot=False)
390
+ # # vertices_world = smpl_body_world.vertices.detach().cpu().numpy().squeeze()
391
+ # # joints3d_world = smpl_body_world.joints[:, 25:, :].detach().cpu().numpy().squeeze()
392
+
393
+ # # mesh = trimesh.Trimesh(vertices_world, body_model.faces,
394
+ # # process=False,
395
+ # # maintain_order=True)
396
+ # # mesh.export('gt_mesh_world_smplx.obj')
397
+
398
+ # # get SMPL params in camera coordinates
399
+ # global_orient_cam = np.matmul(camR, global_orient)
400
+ # transl_cam = np.matmul(camR, (pelvis_cam0 + transl).T).T + camT - pelvis_cam0
401
+ # full_pose_rotmat_cam = np.concatenate((global_orient_cam, pose_rotmat), axis=0).squeeze()
402
+ # theta_cam = batch_rot2aa(torch.FloatTensor(full_pose_rotmat_cam)).reshape(-1, 66).cpu().numpy()
403
+ # # smpl_body_cam = body_model(betas=torch.FloatTensor(beta).to('cuda'),
404
+ # # body_pose=torch.FloatTensor(pose_rotmat).to('cuda'),
405
+ # # transl=torch.FloatTensor(transl_cam).to('cuda'),
406
+ # # global_orient=torch.FloatTensor(global_orient_cam).to('cuda'),
407
+ # # pose2rot=False)
408
+ # # vertices_cam = smpl_body_cam.vertices.detach().cpu().numpy().squeeze()
409
+ # # joints3d_cam = smpl_body_cam.joints[:, 25:, :].detach().cpu().numpy().squeeze()
410
+ # #
411
+ # # mesh = trimesh.Trimesh(vertices_cam, body_model.faces,
412
+ # # process=False,
413
+ # # maintain_order=True)
414
+ # # mesh.export('mesh_in_cam0.obj')
415
+
416
+ # # read GT 2D keypoints
417
+ # K = np.eye(3, dtype=np.float)
418
+ # K[0, 0] = focal_length_x / downsample_factor
419
+ # K[1, 1] = focal_length_y / downsample_factor
420
+ # K[:2, 2:] = camC.T / downsample_factor
421
+ # # projected_points = (K @ joints3d_cam.T).T
422
+ # # joints2d = projected_points[:, :2] / np.hstack((projected_points[:, 2:], projected_points[:, 2:]))
423
+ # # part = np.hstack((joints2d, np.ones((joints2d.shape[0], 1))))
424
+
425
+ # # get openpose 2D keypoints
426
+ # try:
427
+ # with open(openpose_path, 'r') as f:
428
+ # openpose = json.load(f)
429
+ # openpose = np.array(openpose['people'][0]['pose_keypoints_2d']).reshape([-1, 3])
430
+ # except:
431
+ # print(f'No openpose !! Missing {openpose_path}')
432
+ # continue
433
+
434
+ # # get camera parameters wrt to scan
435
+ # R_worldtocam = np.matmul(camR, R_ioi2world) # Note: R_ioi2world is transposed
436
+ # T_worldtocam = -t_ioi2world + camT
437
+
438
+ # # ground offset
439
+ # # ground_offset = ground_eq[2]/
440
+
441
+ # # # get stability labels: 1: stable, 0: unstable but in contact, -1: unstable and not `in contact
442
+ # # in_bos_label, contact_label, contact_mask = vis_smpl_with_ground(theta_world, transl_world, beta, seq_i,
443
+ # # vis_path,
444
+ # # start_idx=frame_num,
445
+ # # sub_sample=1,
446
+ # # ground_offset=ground_offset,
447
+ # # smpl_batch_size=1,
448
+ # # visualize=False)
449
+ # # in_bos_label = in_bos_label.detach().cpu().numpy()
450
+ # # contact_label = contact_label.detach().cpu().numpy()
451
+ # # contact_mask = contact_mask.detach().cpu().numpy()
452
+
453
+ # # visualize world smpl on ground plane
454
+ # # if visualize:
455
+ # # if cam_num == 0:
456
+ # # vis_smpl_with_ground(theta_world, transl_world, beta, split+'_'+seq_i, vis_path,
457
+ # # start_idx=frame_num,
458
+ # # sub_sample=1,
459
+ # # ground_offset=ground_offset,
460
+ # # smpl_batch_size=1,
461
+ # # visualize=True)
462
+
463
+
464
+ # # ## visualize projected points
465
+ # # img = cv2.imread(img_path)
466
+ # # joints2d = joints2d.astype(np.int)
467
+ # # img[joints2d[:, 1], joints2d[:, 0], :] = [0, 255, 0]
468
+
469
+ # # read GT 3D pose in cam coordinates
470
+ # # S24 = joints3d_cam
471
+ # # pelvis_cam = (S24[[2], :] + S24[[3], :]) / 2
472
+ # # S24 -= pelvis_cam
473
+ # # S24 = np.hstack([S24, np.ones((S24.shape[0], 1))])
474
+
475
+ # # read GT 3D pose in world coordinates
476
+ # # S24_world = joints3d_world
477
+ # # S24_world = np.hstack([S24_world, np.ones((S24_world.shape[0], 1))])
478
+
479
+ # # store data
480
+ # imgnames_.append(img_path)
481
+ # centers_.append(center)
482
+ # scales_.append(scale)
483
+ # # parts_.append(part)
484
+ # # Ss_.append(S24)
485
+ # # Ss_world_.append(S24_world)
486
+ # openposes_.append(openpose)
487
+ # poses_.append(theta_cam.squeeze())
488
+ # transls_.append(transl.squeeze())
489
+ # poses_world_.append(theta_world.squeeze())
490
+ # transls_world_.append(transl_world.squeeze())
491
+ # shapes_.append(beta.squeeze())
492
+ # cams_r_.append(R_worldtocam.tolist())
493
+ # # Todo: note that T_worldtocam here is (1,3) whereas in h36m T_worldtocam is (1,3)
494
+ # cams_t_.append(T_worldtocam.tolist())
495
+ # cams_k_.append(K.tolist())
496
+ # # in_bos_label_.append(in_bos_label)
497
+ # # contact_label_.append(contact_label)
498
+ # ground_offset_.append(ground_offset)
499
+
500
+
501
+ # store the data struct
502
+ if not os.path.isdir(out_path):
503
+ os.makedirs(out_path)
504
+ out_file = os.path.join(out_path, f'rich_{scene_name}_smplx.npz')
505
+ np.savez(out_file, imgname=imgnames_,
506
+ # center=centers_,
507
+ # scale=scales_,
508
+ # part=parts_,
509
+ # S=Ss_,
510
+ # S_world=Ss_world_,
511
+ pose=poses_,
512
+ transl=transls_,
513
+ shape=shapes_,
514
+ # openpose=openposes_,
515
+ # pose_world=poses_world_,
516
+ # transl_world=transls_world_,
517
+ # cam_r=cams_r_,
518
+ # cam_t=cams_t_,
519
+ cam_k=cams_k_,
520
+ # in_bos_label=in_bos_label_,
521
+ contact_label=contact_label_,
522
+ # ground_offset=ground_offset_
523
+ scene_seg=scene_seg_,
524
+ part_seg=part_seg_
525
+ )
526
+ print('Saved to ', out_file)
527
+
528
+ def rectify_pose(camera_r, body_aa):
529
+ body_r = batch_rodrigues(body_aa).reshape(-1,3,3)
530
+ final_r = camera_r @ body_r
531
+ body_aa = batch_rot2aa(final_r)
532
+ return body_aa
533
+
534
+
535
+ def extract_cam_param_xml(xml_path: str = '', dtype=float):
536
+ import xml.etree.ElementTree as ET
537
+ tree = ET.parse(xml_path)
538
+
539
+ extrinsics_mat = [float(s) for s in tree.find('./CameraMatrix/data').text.split()]
540
+ intrinsics_mat = [float(s) for s in tree.find('./Intrinsics/data').text.split()]
541
+ distortion_vec = [float(s) for s in tree.find('./Distortion/data').text.split()]
542
+
543
+ focal_length_x = intrinsics_mat[0]
544
+ focal_length_y = intrinsics_mat[4]
545
+ center = np.array([[intrinsics_mat[2], intrinsics_mat[5]]], dtype=dtype)
546
+
547
+ rotation = np.array([[extrinsics_mat[0], extrinsics_mat[1], extrinsics_mat[2]],
548
+ [extrinsics_mat[4], extrinsics_mat[5], extrinsics_mat[6]],
549
+ [extrinsics_mat[8], extrinsics_mat[9], extrinsics_mat[10]]], dtype=dtype)
550
+
551
+ translation = np.array([[extrinsics_mat[3], extrinsics_mat[7], extrinsics_mat[11]]], dtype=dtype)
552
+
553
+ # t = -Rc --> c = -R^Tt
554
+ cam_center = [-extrinsics_mat[0] * extrinsics_mat[3] - extrinsics_mat[4] * extrinsics_mat[7] - extrinsics_mat[8] *
555
+ extrinsics_mat[11],
556
+ -extrinsics_mat[1] * extrinsics_mat[3] - extrinsics_mat[5] * extrinsics_mat[7] - extrinsics_mat[9] *
557
+ extrinsics_mat[11],
558
+ -extrinsics_mat[2] * extrinsics_mat[3] - extrinsics_mat[6] * extrinsics_mat[7] - extrinsics_mat[10] *
559
+ extrinsics_mat[11]]
560
+
561
+ cam_center = np.array([cam_center], dtype=dtype)
562
+
563
+ k1 = np.array([distortion_vec[0]], dtype=dtype)
564
+ k2 = np.array([distortion_vec[1]], dtype=dtype)
565
+
566
+ return focal_length_x, focal_length_y, center, rotation, translation, cam_center, k1, k2
567
+
568
+ # rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/work/achatterjee/rich/npzs', split='train')
569
+ # rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/fast/achatterjee/rich/scene_npzs', split='val')
570
+ # rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/fast/achatterjee/rich/scene_npzs/test', split='test')
571
+ # rich_extract(dataset_path='/ps/scratch/ps_shared/stripathi/4yogi/RICH/val/', out_path='/home/achatterjee/rich_ext', split='val')
572
+
573
+ if __name__=='__main__':
574
+ parser = argparse.ArgumentParser()
575
+ parser.add_argument('--index', type=int)
576
+ args = parser.parse_args()
577
+
578
+ rich_extract(img_dataset_path='/is/cluster/work/achatterjee/rich', out_path='/is/cluster/fast/achatterjee/rich/scene_npzs/test', scene_indx=args.index, split='test')
data/preprocess/yoga-82_test/yoga82_simple_test.npz.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # load split.json and make an npz with all the folders in the test split
2
+ import argparse
3
+ import json
4
+ import os
5
+ import glob
6
+ import numpy as np
7
+
8
+ Yoga_82_PATH = '/is/cluster/work/stripathi/pycharm_remote/yogi/data/Yoga-82/yoga_dataset_images'
9
+
10
+ if __name__ == '__main__':
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument('--data_dir', type=str, default=Yoga_82_PATH)
13
+ parser.add_argument('--out_file', type=str, default='data/dataset_extras/yoga-82/yoga-82_simple_test_20each.npz')
14
+ args = parser.parse_args()
15
+
16
+ # structs we use
17
+ imgnames_ = []
18
+
19
+ # get all ffolder names in the data_dir
20
+ folders = glob.glob(os.path.join(args.data_dir, '*'))
21
+ folders.sort()
22
+ for folder in folders:
23
+ print(folder)
24
+ # get all images in the folder
25
+ images = glob.glob(os.path.join(folder, '*.jpg'), recursive=True)
26
+ print(len(images))
27
+ # only take random 50 images from each folder
28
+ images = np.random.choice(images, 20, replace=False)
29
+ imgnames_.extend(images)
30
+
31
+
32
+ np.savez(args.out_file, imgname=imgnames_,)
33
+ print('Saved to ', args.out_file)
34
+
example_images/213.jpg ADDED
example_images/pexels-photo-15732209.jpeg ADDED
example_images/pexels-photo-207569.webp ADDED
example_images/pexels-photo-3622517.webp ADDED
fetch_data.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Script that fetches checkpoints and other necessary data for inference
4
+
5
+ # Download utility files and other constants
6
+ wget https://keeper.mpdl.mpg.de/f/50cf65320b824391854b/?dl=1 --max-redirect=2 --trust-server-names && tar -xvf data.tar.gz && rm -r data.tar.gz
7
+
8
+ # Downloading existing checkpoint
9
+ mkdir checkpoints
10
+ wget https://keeper.mpdl.mpg.de/f/6f2e2258558f46ceb269/?dl=1 --max-redirect=2 --trust-server-names && tar -xvf Release_Checkpoint.tar.gz --directory checkpoints && rm -r Release_Checkpoint.tar.gz
11
+
12
+ # Downloading other checkpoint
13
+ wget https://keeper.mpdl.mpg.de/f/9cb970221b1e45d185b8/?dl=1 --max-redirect=2 --trust-server-names && tar -xvf Other_Checkpoints.tar.gz --directory checkpoints && rm -r Other_Checkpoints.tar.gz
14
+
15
+ # Downloading training datasets
16
+ mkdir datasets
17
+ wget https://keeper.mpdl.mpg.de/f/81c3ec9997dd440b8db3/?dl=1 --max-redirect=2 --trust-server-names && tar -xvf Release_Datasets.tar.gz --directory datasets && rm -r Release_Datasets.tar.gz
18
+
hot_analysis/.ipynb_checkpoints/hico_analysis-checkpoint.ipynb ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import json\n",
10
+ "import pandas as pd \n",
11
+ "import ipdb\n",
12
+ "import os\n",
13
+ "import pickle as pkl\n",
14
+ "import os.path as osp\n",
15
+ "import numpy as np\n",
16
+ "from PIL import Image\n"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 6,
22
+ "metadata": {},
23
+ "outputs": [
24
+ {
25
+ "name": "stdout",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "Total images found: 9642\n",
29
+ "Images after KP filtering: 3895\n",
30
+ "['hake_train2015_HICO_train2015_00005476.jpg', 'hake_train2015_HICO_train2015_00008329.jpg', 'hake_train2015_HICO_train2015_00008027.jpg', 'hake_train2015_HICO_train2015_00013408.jpg', 'hake_train2015_HICO_train2015_00010656.jpg']\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "# Load Agniv VITpose-base hico filtering\n",
36
+ "filter_path = './agniv_pose_filter/hico.npy'\n",
37
+ "pose_md = np.load(filter_path, allow_pickle=True)\n",
38
+ "pose_md = pose_md.item()\n",
39
+ "filter_img_names = {}\n",
40
+ "\n",
41
+ "print(f'Total images found: {len(pose_md)}')\n",
42
+ "\n",
43
+ "# Filter out images with < 10 visible keypoints \n",
44
+ "kp_thresh = 10\n",
45
+ "\n",
46
+ "for imgname, pose_num in pose_md.items():\n",
47
+ " if pose_num > kp_thresh:\n",
48
+ " filter_img_names[imgname] = pose_num\n",
49
+ " \n",
50
+ "print(f'Images after KP filtering: {len(filter_img_names)}')\n",
51
+ "\n",
52
+ "print(list(filter_img_names.keys())[:5])\n"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 7,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "# # Load Agniv VITpose-base hot dict\n",
62
+ "# filter_path = \"./agniv_pose_filter/hot_dict.pkl\"\n",
63
+ "# with open(filter_path, 'rb') as f:\n",
64
+ "# pose_md_dict = pkl.load(f)\n",
65
+ " \n",
66
+ "# hico_dict = {}\n",
67
+ "\n",
68
+ "# for k, v in pose_md_dict.items():\n",
69
+ "# if 'hake' in k:\n",
70
+ "# hico_dict[k] = v\n",
71
+ " \n",
72
+ "# print(f'Total images found: {len(hico_dict)}')\n",
73
+ "\n",
74
+ "# # Filter out images with < 10 visible keypoints \n",
75
+ "# kp_thresh = 10\n",
76
+ "\n",
77
+ "# filter_img_names = {}\n",
78
+ "\n",
79
+ "# for imgname, kp_md in hico_dict.items():\n",
80
+ "# if kp_md == 0:\n",
81
+ "# continue\n",
82
+ "# if kp_md[\"num_persons\"] == 1 and kp_md[\"num_kpt\"][0.5][0] > kp_thresh:\n",
83
+ "# filter_img_names[imgname] = kp_md[\"num_kpt\"][0.5][0]\n",
84
+ " \n",
85
+ "# print(f'Images after KP filtering: {len(filter_img_names)}')"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 8,
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "name": "stdout",
95
+ "output_type": "stream",
96
+ "text": [
97
+ " 0 1 2\n",
98
+ "0 1 airplane board\n",
99
+ "1 2 airplane direct\n",
100
+ "2 3 airplane exit\n",
101
+ "3 4 airplane fly\n",
102
+ "4 5 airplane inspect\n",
103
+ ".. ... ... ...\n",
104
+ "595 596 zebra feed\n",
105
+ "596 597 zebra hold\n",
106
+ "597 598 zebra pet\n",
107
+ "598 599 zebra watch\n",
108
+ "599 600 zebra no_interaction\n",
109
+ "\n",
110
+ "[600 rows x 3 columns]\n"
111
+ ]
112
+ }
113
+ ],
114
+ "source": [
115
+ "\n",
116
+ "hico_annot = json.load(open('/ps/project/datasets/HICO/hico-image-level/hico-training-set-image-level.json', 'rb'))\n",
117
+ "hoi_mapping = pd.read_csv('/ps/project/datasets/HICO/hico-image-level/hico_hoi_list.txt', header=None, delim_whitespace=True)\n",
118
+ "print(hoi_mapping)"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 9,
124
+ "metadata": {
125
+ "scrolled": true
126
+ },
127
+ "outputs": [
128
+ {
129
+ "name": "stdout",
130
+ "output_type": "stream",
131
+ "text": [
132
+ "Final number of images 3154\n"
133
+ ]
134
+ }
135
+ ],
136
+ "source": [
137
+ "version = '1'\n",
138
+ "out_dir = f'./filtered_data/v_{version}'\n",
139
+ "os.makedirs(out_dir, exist_ok=True)\n",
140
+ "\n",
141
+ "objectwise_img_names = {}\n",
142
+ "imgwise_object_names = {}\n",
143
+ "img_dir = '/ps/project/datasets/HICO/hico_20150920/images/train2015'\n",
144
+ "\n",
145
+ "bad_object_names = ['bear', 'bird', 'cat', 'cow', \n",
146
+ " 'dog', 'elephant', 'giraffe', 'horse', \n",
147
+ " 'mouse', 'person', 'sheep', 'zebra' ]\n",
148
+ "bad_action_names = ['buy', 'chase', 'direct', 'greet', 'herd', 'hose',\n",
149
+ " 'hug', 'hunt', 'milk', 'no_interaction', 'pet', 'point', 'teach',\n",
150
+ " 'watch', 'wave']\n",
151
+ "\n",
152
+ "for i, (img_name, img_md) in enumerate(hico_annot.items()):\n",
153
+ " \n",
154
+ " # Apply keypoint number filtering on the images \n",
155
+ " full_img_name = 'hake_train2015_' + img_name\n",
156
+ " if full_img_name not in filter_img_names.keys():\n",
157
+ " continue\n",
158
+ " \n",
159
+ " # show the image\n",
160
+ " if i < 0:\n",
161
+ " img = Image.open(osp.join(img_dir,img_name))\n",
162
+ " display(img)\n",
163
+ " \n",
164
+ " obj_names = []\n",
165
+ " action_names = []\n",
166
+ " kp_num = filter_img_names[full_img_name]\n",
167
+ " \n",
168
+ " # travel through all hoi in the metadata, save obj_names and action_names for the hois\n",
169
+ " for hoi_id in img_md['hoi_id']:\n",
170
+ " img_md_row = hoi_mapping.loc[hoi_mapping[0] == hoi_id].iloc[0]\n",
171
+ "\n",
172
+ " obj_name = img_md_row[1]\n",
173
+ " if obj_name in bad_object_names:\n",
174
+ " continue\n",
175
+ " action_name = img_md_row[2]\n",
176
+ " if action_name in bad_action_names:\n",
177
+ " continue\n",
178
+ " \n",
179
+ " obj_names.append(obj_name)\n",
180
+ " action_names.append(action_name)\n",
181
+ " \n",
182
+ " if len(set(obj_names)) == 0 or len(action_names) == 0:\n",
183
+ " continue\n",
184
+ " \n",
185
+ " imgwise_object_names.setdefault(full_img_name,[]).extend(list(set(obj_names)))\n",
186
+ " \n",
187
+ "# # # Display images with multiple objects\n",
188
+ "# if len(set(obj_names)) > 1:\n",
189
+ "# print(img_name)\n",
190
+ "# print(obj_names)\n",
191
+ "# print(action_names)\n",
192
+ "# print(f'Number of Kps: {kp_num}')\n",
193
+ "# img = Image.open(osp.join(img_dir,img_name))\n",
194
+ "# display(img)\n",
195
+ " \n",
196
+ " for obj_name in set(obj_names):\n",
197
+ " objectwise_img_names.setdefault(obj_name,[]).append(full_img_name)\n",
198
+ " \n",
199
+ "print(f'Final number of images {len(imgwise_object_names)}')"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": null,
205
+ "metadata": {
206
+ "scrolled": true
207
+ },
208
+ "outputs": [],
209
+ "source": [
210
+ "# Save the imagewise_object dict\n",
211
+ "out_path = osp.join(out_dir, 'object_per_image_dict.json')\n",
212
+ "with open(out_path, 'w') as fp:\n",
213
+ " json.dump(imgwise_object_names, fp)\n",
214
+ " print(f'saved at {out_path}')\n",
215
+ " \n",
216
+ "# # save image_list \n",
217
+ "# out_path = osp.join(out_dir, 'hico_imglist_all_140223.txt')\n",
218
+ "# with open(out_path, 'w') as f:\n",
219
+ "# f.write('\\n'.join(imgwise_object_names.keys()))\n",
220
+ "# print(f'saved at {out_path}')\n",
221
+ "\n",
222
+ "\n",
223
+ "# Save the object_wise dict\n",
224
+ "out_path = osp.join(out_dir, 'imgnames_per_object_dict.json')\n",
225
+ "with open(out_path, 'w') as fp:\n",
226
+ " json.dump(objectwise_img_names, fp)\n",
227
+ " print(f'saved at {out_path}')\n",
228
+ "\n",
229
+ " \n",
230
+ "\n",
231
+ " "
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "import matplotlib.pyplot as plt\n",
241
+ "import seaborn as sns\n",
242
+ "\n",
243
+ "# sort the dictionary \n",
244
+ "objectwise_img_names = dict(sorted(objectwise_img_names.items(), key=lambda x: len(x[1]), reverse=True))\n",
245
+ "\n",
246
+ "# Extract object names and image counts\n",
247
+ "obj_names = list(objectwise_img_names.keys())\n",
248
+ "img_counts = [len(objectwise_img_names[obj]) for obj in objectwise_img_names]\n",
249
+ "print(sum(img_counts))\n",
250
+ "\n",
251
+ "# Create bar plot\n",
252
+ "sns.barplot(x=obj_names, y=img_counts)\n",
253
+ "\n",
254
+ "# Add x-axis and y-axis labels\n",
255
+ "plt.xlabel('Object')\n",
256
+ "plt.ylabel('Number of Images')\n",
257
+ "\n",
258
+ "plt.xticks(rotation=45, ha='right', fontsize=3)\n",
259
+ "\n",
260
+ "# Save the plot as a high-resolution image file\n",
261
+ "out_path = osp.join(out_dir, 'image_per_object_category.png')\n",
262
+ "plt.savefig(out_path, dpi=300)\n",
263
+ "\n",
264
+ "# Show plot\n",
265
+ "plt.show()"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": null,
271
+ "metadata": {},
272
+ "outputs": [],
273
+ "source": [
274
+ "\n",
275
+ " "
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": []
284
+ }
285
+ ],
286
+ "metadata": {
287
+ "kernelspec": {
288
+ "display_name": "Python 3",
289
+ "language": "python",
290
+ "name": "python3"
291
+ },
292
+ "language_info": {
293
+ "codemirror_mode": {
294
+ "name": "ipython",
295
+ "version": 3
296
+ },
297
+ "file_extension": ".py",
298
+ "mimetype": "text/x-python",
299
+ "name": "python",
300
+ "nbconvert_exporter": "python",
301
+ "pygments_lexer": "ipython3",
302
+ "version": "3.8.5"
303
+ }
304
+ },
305
+ "nbformat": 4,
306
+ "nbformat_minor": 4
307
+ }
hot_analysis/.ipynb_checkpoints/vcoco_analysis-checkpoint.ipynb ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 30,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import json\n",
10
+ "import pandas as pd \n",
11
+ "import ipdb\n",
12
+ "import os\n",
13
+ "import pickle as pkl\n",
14
+ "import os.path as osp\n",
15
+ "import numpy as np\n",
16
+ "from PIL import Image"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 31,
22
+ "metadata": {},
23
+ "outputs": [
24
+ {
25
+ "name": "stdout",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "Total images found: 5459\n",
29
+ "Images after KP filtering: 2386\n",
30
+ "['vcoco_000000051899.jpg', 'vcoco_000000093127.jpg', 'vcoco_000000455665.jpg', 'vcoco_000000248389.jpg', 'vcoco_000000403947.jpg']\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "# Load Agniv VITpose-base hot dict\n",
36
+ "filter_path = \"./agniv_pose_filter/hot_dict.pkl\"\n",
37
+ "with open(filter_path, 'rb') as f:\n",
38
+ " pose_md_dict = pkl.load(f)\n",
39
+ " \n",
40
+ "vcoco_dict = {}\n",
41
+ "\n",
42
+ "for k, v in pose_md_dict.items():\n",
43
+ " if 'vcoco' in k:\n",
44
+ " vcoco_dict[k] = v\n",
45
+ " \n",
46
+ "print(f'Total images found: {len(vcoco_dict)}')\n",
47
+ "\n",
48
+ "# Filter out images with < 10 visible keypoints \n",
49
+ "kp_thresh = 10\n",
50
+ "\n",
51
+ "filter_img_names = {}\n",
52
+ "\n",
53
+ "for imgname, kp_md in vcoco_dict.items():\n",
54
+ " if kp_md == 0:\n",
55
+ " continue\n",
56
+ " if kp_md[\"num_persons\"] == 1 and kp_md[\"num_kpt\"][0.5][0] > kp_thresh:\n",
57
+ " filter_img_names[imgname] = kp_md[\"num_kpt\"][0.5][0]\n",
58
+ " \n",
59
+ "print(f'Images after KP filtering: {len(filter_img_names)}')\n",
60
+ "\n",
61
+ "print(list(filter_img_names.keys())[:5])"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 35,
67
+ "metadata": {},
68
+ "outputs": [
69
+ {
70
+ "name": "stdout",
71
+ "output_type": "stream",
72
+ "text": [
73
+ "loading annotations into memory...\n",
74
+ "Done (t=1.30s)\n",
75
+ "creating index...\n",
76
+ "index created!\n",
77
+ "Final number of images 2368\n"
78
+ ]
79
+ }
80
+ ],
81
+ "source": [
82
+ "from pycocotools.coco import COCO\n",
83
+ "version = '1'\n",
84
+ "out_dir = f'./filtered_data/v_{version}/vcoco'\n",
85
+ "os.makedirs(out_dir, exist_ok=True)\n",
86
+ "\n",
87
+ "coco_dir = '/is/cluster/work/stripathi/pycharm_remote/v-coco/data'\n",
88
+ "coco = COCO(os.path.join(coco_dir, 'instances_vcoco_all_2014.json'))\n",
89
+ "\n",
90
+ "imgwise_object_names = {}\n",
91
+ "img_dir = '/ps/project/datasets/HOT/Contact_Data/images/training/'\n",
92
+ "\n",
93
+ "bad_object_names = ['bird', 'cat', 'dog', 'horse', 'sheep', 'cow', \n",
94
+ " 'elephant', 'bear', 'zebra', 'giraffe']\n",
95
+ "\n",
96
+ "# # Get all coco objects\n",
97
+ "# count = 0\n",
98
+ "# for id in range(91):\n",
99
+ "# try: \n",
100
+ "# print(coco.loadCats(id))\n",
101
+ "# count += 1\n",
102
+ "# except:\n",
103
+ "# print(f'ID:{id} does not exist')\n",
104
+ "# continue\n",
105
+ "# print(count)\n",
106
+ "\n",
107
+ "objectwise_img_names = {}\n",
108
+ "for img_name in filter_img_names.keys():\n",
109
+ " img_id = int(os.path.splitext(img_name.split('_')[-1])[0])\n",
110
+ " ann_ids = coco.getAnnIds([img_id])\n",
111
+ " anns = coco.loadAnns(ann_ids)\n",
112
+ " object_list = []\n",
113
+ " for ann in anns:\n",
114
+ " object_cat = coco.loadCats(ann[\"category_id\"])\n",
115
+ " if len(object_cat) > 1:\n",
116
+ " ipdb.set_trace()\n",
117
+ " object_name = object_cat[0]['name']\n",
118
+ " if object_name in bad_object_names:\n",
119
+ " continue\n",
120
+ " if object_name != 'person':\n",
121
+ " object_list.append(object_name)\n",
122
+ " if len(object_list) != 0:\n",
123
+ " imgwise_object_names[img_name] = list(set(object_list))\n",
124
+ " \n",
125
+ " for obj_name in set(object_list):\n",
126
+ " objectwise_img_names.setdefault(obj_name,[]).append(img_name)\n",
127
+ " \n",
128
+ "# # Visualize images \n",
129
+ "# print(img_name)\n",
130
+ "# print(list(set(object_list)))\n",
131
+ "# img = Image.open(osp.join(img_dir,img_name))\n",
132
+ "# display(img)\n",
133
+ " \n",
134
+ " \n",
135
+ "print(f'Final number of images {len(imgwise_object_names)}')\n",
136
+ " \n"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 36,
142
+ "metadata": {
143
+ "scrolled": true
144
+ },
145
+ "outputs": [
146
+ {
147
+ "name": "stdout",
148
+ "output_type": "stream",
149
+ "text": [
150
+ "saved at ./filtered_data/v_1/vcoco/object_per_image_dict.json\n",
151
+ "saved at ./filtered_data/v_1/vcoco/vcoco_imglist_all_170223.txt\n",
152
+ "saved at ./filtered_data/v_1/vcoco/imgnames_per_object_dict.json\n"
153
+ ]
154
+ }
155
+ ],
156
+ "source": [
157
+ "# Save the imagewise_object dict\n",
158
+ "out_path = osp.join(out_dir, 'object_per_image_dict.json')\n",
159
+ "with open(out_path, 'w') as fp:\n",
160
+ " json.dump(imgwise_object_names, fp)\n",
161
+ " print(f'saved at {out_path}')\n",
162
+ " \n",
163
+ "# save image_list \n",
164
+ "out_path = osp.join(out_dir, 'vcoco_imglist_all_170223.txt')\n",
165
+ "with open(out_path, 'w') as f:\n",
166
+ " f.write('\\n'.join(imgwise_object_names.keys()))\n",
167
+ " print(f'saved at {out_path}')\n",
168
+ "\n",
169
+ "\n",
170
+ "# Save the object_wise dict\n",
171
+ "out_path = osp.join(out_dir, 'imgnames_per_object_dict.json')\n",
172
+ "with open(out_path, 'w') as fp:\n",
173
+ " json.dump(objectwise_img_names, fp)\n",
174
+ " print(f'saved at {out_path}')\n",
175
+ "\n",
176
+ " \n",
177
+ "\n",
178
+ " "
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": 37,
184
+ "metadata": {
185
+ "scrolled": false
186
+ },
187
+ "outputs": [
188
+ {
189
+ "name": "stdout",
190
+ "output_type": "stream",
191
+ "text": [
192
+ "5072\n"
193
+ ]
194
+ },
195
+ {
196
+ "data": {
197
+ "image/png": "\n",
198
+ "text/plain": [
199
+ "<Figure size 432x288 with 1 Axes>"
200
+ ]
201
+ },
202
+ "metadata": {
203
+ "needs_background": "light"
204
+ },
205
+ "output_type": "display_data"
206
+ }
207
+ ],
208
+ "source": [
209
+ "import matplotlib.pyplot as plt\n",
210
+ "import seaborn as sns\n",
211
+ "\n",
212
+ "# sort the dictionary \n",
213
+ "objectwise_img_names = dict(sorted(objectwise_img_names.items(), key=lambda x: len(x[1]), reverse=True))\n",
214
+ "\n",
215
+ "# Extract object names and image counts\n",
216
+ "obj_names = list(objectwise_img_names.keys())\n",
217
+ "img_counts = [len(objectwise_img_names[obj]) for obj in objectwise_img_names]\n",
218
+ "print(sum(img_counts))\n",
219
+ "\n",
220
+ "# Create bar plot\n",
221
+ "sns.barplot(x=obj_names, y=img_counts)\n",
222
+ "\n",
223
+ "# Add x-axis and y-axis labels\n",
224
+ "plt.xlabel('Object')\n",
225
+ "plt.ylabel('Number of Images')\n",
226
+ "\n",
227
+ "plt.xticks(rotation=45, ha='right', fontsize=3)\n",
228
+ "\n",
229
+ "# Save the plot as a high-resolution image file\n",
230
+ "out_path = osp.join(out_dir, 'image_per_object_category.png')\n",
231
+ "plt.savefig(out_path, dpi=300)\n",
232
+ "\n",
233
+ "# Show plot\n",
234
+ "plt.show()"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": [
243
+ "\n",
244
+ " "
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": null,
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": []
253
+ }
254
+ ],
255
+ "metadata": {
256
+ "kernelspec": {
257
+ "display_name": "Python 3",
258
+ "language": "python",
259
+ "name": "python3"
260
+ },
261
+ "language_info": {
262
+ "codemirror_mode": {
263
+ "name": "ipython",
264
+ "version": 3
265
+ },
266
+ "file_extension": ".py",
267
+ "mimetype": "text/x-python",
268
+ "name": "python",
269
+ "nbconvert_exporter": "python",
270
+ "pygments_lexer": "ipython3",
271
+ "version": "3.8.5"
272
+ }
273
+ },
274
+ "nbformat": 4,
275
+ "nbformat_minor": 4
276
+ }
hot_analysis/agniv_pose_filter/hico.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01acf6e31dad34eb1e48277e227ea0668f17905f55eae44b640cc1d14a9e41c
3
+ size 520245
hot_analysis/agniv_pose_filter/hot.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64872b84670cb2ec643f1649e1a7926aed950adcb8f6f952916924bc4ab10f37
3
+ size 731363
hot_analysis/agniv_pose_filter/hot_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c0a6e35bf87e98327189ef8d732aa19b0d1c85c963c1ee348e1e466864845e
3
+ size 3679000
hot_analysis/agniv_pose_filter/pq_wnp.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cad59d5aaf077141d60a492c3784d8994d53f4dfd020f9eb4e11b3e430ef65d
3
+ size 25433
hot_analysis/agniv_pose_filter/vcoco.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14cc34940ff4a61cfc952d8c5b98e0e7b1a58392177198fa4589f8b2bcead02a
3
+ size 184801
hot_analysis/count_objects_per_img.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # get average number of objects per image
2
+ import os.path as osp
3
+ import json
4
+ import plotly.express as px
5
+ import plotly.io as pio
6
+
7
+ version = '1'
8
+ dir = '/is/cluster/work/stripathi/pycharm_remote/dca_contact/hot_analysis/'
9
+ out_dir_hico = osp.join(dir, f'filtered_data/v_{version}/hico')
10
+ out_dir_vcoco = osp.join(dir, f'filtered_data/v_{version}/vcoco')
11
+
12
+ imgwise_obj_dict_hico = osp.join(out_dir_hico, 'object_per_image_dict.json')
13
+ imgwise_obj_dict_vcoco = osp.join(out_dir_vcoco, 'object_per_image_dict.json')
14
+
15
+ with open(imgwise_obj_dict_hico, 'r') as fp:
16
+ imgwise_obj_dict_hico = json.load(fp)
17
+ with open(imgwise_obj_dict_vcoco, 'r') as fp:
18
+ imgwise_obj_dict_vcoco = json.load(fp)
19
+
20
+ # combine the dicts
21
+ imgwise_obj_dict = imgwise_obj_dict_hico.copy()
22
+ imgwise_obj_dict.update(imgwise_obj_dict_vcoco)
23
+
24
+ # get average length of object in the object per image key
25
+ avg_obj_per_img = sum([len(v) for v in imgwise_obj_dict.values()]) / len(imgwise_obj_dict)
26
+ print(f'Average number of objects per image: {avg_obj_per_img}')
27
+
28
+ # get average searately for hico and vcoco
29
+ avg_obj_per_img_hico = sum([len(v) for v in imgwise_obj_dict_hico.values()]) / len(imgwise_obj_dict_hico)
30
+ print(f'Average number of objects per image in hico: {avg_obj_per_img_hico}')
31
+
32
+ avg_obj_per_img_vcoco = sum([len(v) for v in imgwise_obj_dict_vcoco.values()]) / len(imgwise_obj_dict_vcoco)
33
+ print(f'Average number of objects per image in vcoco: {avg_obj_per_img_vcoco}')
34
+
35
+
hot_analysis/create_combined_objectwise_plots.ipynb ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 22,
6
+ "id": "1738c3a5",
7
+ "metadata": {
8
+ "scrolled": true
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "import os.path as osp\n",
13
+ "import json\n",
14
+ "import os\n",
15
+ "\n",
16
+ "version = '1'\n",
17
+ "out_dir_hico = f'./filtered_data/v_{version}/hico'\n",
18
+ "out_dir_vcoco = f'./filtered_data/v_{version}/vcoco'\n",
19
+ "out_dir_phosa = f'./filtered_data/v_{version}/phosa_split'\n",
20
+ "out_dir_behave = f'./filtered_data/v_{version}/behave_split'\n",
21
+ "\n",
22
+ "objectwise_img_dict_hico = osp.join(out_dir_hico, 'imgnames_per_object_dict.json')\n",
23
+ "objectwise_img_dict_vcoco = osp.join(out_dir_vcoco, 'imgnames_per_object_dict.json')\n",
24
+ "\n",
25
+ "with open(objectwise_img_dict_hico, 'r') as fp:\n",
26
+ " objectwise_img_dict_hico = json.load(fp)\n",
27
+ "with open(objectwise_img_dict_vcoco, 'r') as fp:\n",
28
+ " objectwise_img_dict_vcoco = json.load(fp)\n",
29
+ "\n",
30
+ "# sort the dictionary \n",
31
+ "objectwise_img_names1 = dict(sorted(objectwise_img_dict_hico.items(), key=lambda x: len(x[1]), reverse=True))\n",
32
+ "\n",
33
+ "# # Extract object names and image counts\n",
34
+ "# obj_names1 = list(objectwise_img_names_hico.keys())\n",
35
+ "# print(sorted(obj_names1))\n",
36
+ "# image_count1 = [len(objectwise_img_names_hico[obj]) for obj in objectwise_img_names_hico]\n",
37
+ "# print(sum(image_count1))\n",
38
+ "\n",
39
+ "# sort the dictionary \n",
40
+ "objectwise_img_names2 = dict(sorted(objectwise_img_dict_vcoco.items(), key=lambda x: len(x[1]), reverse=True))\n",
41
+ "\n",
42
+ "# # Extract object names and image counts\n",
43
+ "# obj_names2 = list(objectwise_img_names_vcoco.keys())\n",
44
+ "# print(sorted(obj_names2))\n",
45
+ "# image_count2 = [len(objectwise_img_names_vcoco[obj]) for obj in objectwise_img_names_vcoco]\n",
46
+ "# print(sum(image_count2))\n",
47
+ "\n"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 18,
53
+ "id": "2d6ad5ed",
54
+ "metadata": {},
55
+ "outputs": [
56
+ {
57
+ "name": "stdout",
58
+ "output_type": "stream",
59
+ "text": [
60
+ "2119\n",
61
+ "saved at ./filtered_data/v_1/phosa_split/imgnames_per_object_dict.json\n"
62
+ ]
63
+ }
64
+ ],
65
+ "source": [
66
+ "# Extract image list for object in PHOSA\n",
67
+ "def merge_dictionaries(dict1, dict2):\n",
68
+ " merged_dict = {}\n",
69
+ "\n",
70
+ " # Merge values for common keys\n",
71
+ " for key in set(dict1.keys()) | set(dict2.keys()):\n",
72
+ " merged_dict[key] = list(set(dict1.get(key, [])) | set(dict2.get(key, [])))\n",
73
+ "\n",
74
+ " return merged_dict\n",
75
+ "\n",
76
+ "def filter_dictionary(dictionary, keys):\n",
77
+ " filtered_dict = {key: dictionary[key] for key in keys if key in dictionary}\n",
78
+ " return filtered_dict\n",
79
+ "\n",
80
+ "phosa_object_names = [\"bat\", \"bench\", \"bicycle\", \"laptop\", \"motorcycle\", \"skateboard\", \"surfboard\", \"tennis\"]\n",
81
+ "combined_objectwise_img_names = merge_dictionaries(objectwise_img_names1, objectwise_img_names2)\n",
82
+ "phosa_objectwise_img_names = filter_dictionary(combined_objectwise_img_names, phosa_object_names)\n",
83
+ "net_img_count = 0\n",
84
+ "for v in phosa_objectwise_img_names.values():\n",
85
+ " net_img_count += len(v)\n",
86
+ "print(net_img_count)\n",
87
+ "# Save the object_wise dict\n",
88
+ "os.makedirs(out_dir_phosa, exist_ok=True)\n",
89
+ "out_path_phosa = osp.join(out_dir_phosa, 'imgnames_per_object_dict.json')\n",
90
+ "with open(out_path_phosa, 'w') as fp:\n",
91
+ " json.dump(phosa_objectwise_img_names, fp)\n",
92
+ " print(f'saved at {out_path_phosa}')\n",
93
+ " \n",
94
+ " "
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 23,
100
+ "id": "3f2a5e62",
101
+ "metadata": {},
102
+ "outputs": [
103
+ {
104
+ "name": "stdout",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "saved at ./filtered_data/v_1/behave_split/imgnames_per_object_dict.json\n"
108
+ ]
109
+ }
110
+ ],
111
+ "source": [
112
+ "# Extract image list for object in CHORE\n",
113
+ "\n",
114
+ "behave_object_names_original = [\"backpack\", \"basketball\", \"boxlarge\", \"boxlong\", \"boxmedium\", \"boxsmall\", \"boxtiny\", \"chairblack\", \"chairwood\", \"keyboard\", \"monitor\", \"plasticcontainer\", \"stool\", \"suitcase\", \"tablesmall\", \"tablesquare\", \"toolbox\", \"trashbin\", \"yogaball\", \"yogamat\"]\n",
115
+ "behave_object_names_in_hot = [\"backpack\", \"chair\", \"keyboard\", \"suitcase\"]\n",
116
+ "\n",
117
+ "combined_objectwise_img_names = merge_dictionaries(objectwise_img_names1, objectwise_img_names2)\n",
118
+ "behave_objectwise_img_names = filter_dictionary(combined_objectwise_img_names, behave_object_names_in_hot)\n",
119
+ "net_img_count = 0\n",
120
+ "for v in behave_objectwise_img_names.values():\n",
121
+ " net_img_count += len(v)\n",
122
+ "\n",
123
+ "# Save the object_wise dict\n",
124
+ "os.makedirs(out_dir_behave, exist_ok=True)\n",
125
+ "out_path_behave = osp.join(out_dir_behave, 'imgnames_per_object_dict.json')\n",
126
+ "with open(out_path_behave, 'w') as fp:\n",
127
+ " json.dump(behave_objectwise_img_names, fp)\n",
128
+ " print(f'saved at {out_path_behave}')"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 3,
134
+ "id": "730e9ac2",
135
+ "metadata": {},
136
+ "outputs": [
137
+ {
138
+ "ename": "ModuleNotFoundError",
139
+ "evalue": "No module named 'seaborn'",
140
+ "output_type": "error",
141
+ "traceback": [
142
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
143
+ "\u001B[0;31mModuleNotFoundError\u001B[0m Traceback (most recent call last)",
144
+ "Cell \u001B[0;32mIn[3], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mmatplotlib\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mpyplot\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mplt\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mseaborn\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01msns\u001B[39;00m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;66;03m# sort the dictionary \u001B[39;00m\n\u001B[1;32m 5\u001B[0m objectwise_img_names1 \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mdict\u001B[39m(\u001B[38;5;28msorted\u001B[39m(objectwise_img_names1\u001B[38;5;241m.\u001B[39mitems(), key\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mlambda\u001B[39;00m x: \u001B[38;5;28mlen\u001B[39m(x[\u001B[38;5;241m1\u001B[39m]), reverse\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m))\n",
145
+ "\u001B[0;31mModuleNotFoundError\u001B[0m: No module named 'seaborn'"
146
+ ]
147
+ }
148
+ ],
149
+ "source": [
150
+ "import matplotlib.pyplot as plt\n",
151
+ "import seaborn as sns\n",
152
+ "\n",
153
+ "# sort the dictionary \n",
154
+ "objectwise_img_names1 = dict(sorted(objectwise_img_names1.items(), key=lambda x: len(x[1]), reverse=True))\n",
155
+ "\n",
156
+ "# Extract object names and image counts\n",
157
+ "obj_names1 = list(objectwise_img_names1.keys())\n",
158
+ "print(obj_names1)\n",
159
+ "img_counts1 = [len(objectwise_img_names1[obj]) for obj in objectwise_img_names1]\n",
160
+ "print(sum(img_counts1))\n",
161
+ "\n",
162
+ "# sort the dictionary \n",
163
+ "objectwise_img_names2 = dict(sorted(objectwise_img_names2.items(), key=lambda x: len(x[1]), reverse=True))\n",
164
+ "\n",
165
+ "# Extract object names and image counts\n",
166
+ "obj_names2 = list(objectwise_img_names2.keys())\n",
167
+ "print(obj_names2)\n",
168
+ "img_counts2 = [len(objectwise_img_names2[obj]) for obj in objectwise_img_names2]\n",
169
+ "print(sum(img_counts2))\n",
170
+ "\n",
171
+ "# Create a figure with two subplots\n",
172
+ "fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(10, 8))\n",
173
+ "\n",
174
+ "# Plot the first graph in the first subplot\n",
175
+ "sns.barplot(x=obj_names1, y=img_counts1, ax=axs[0])\n",
176
+ "\n",
177
+ "# Add y-axis label to the first subplot\n",
178
+ "axs[0].set_ylabel('Number of Images')\n",
179
+ "\n",
180
+ "# Set the rotation and alignment of x-tick labels in the first subplot\n",
181
+ "axs[0].tick_params(axis='x', rotation=45)\n",
182
+ "\n",
183
+ "# Plot the second graph in the second subplot\n",
184
+ "sns.barplot(x=obj_names2, y=img_counts2, ax=axs[1])\n",
185
+ "\n",
186
+ "# Add x-axis and y-axis labels to the second subplot\n",
187
+ "axs[1].set_xlabel('Object')\n",
188
+ "axs[1].set_ylabel('Number of Images')\n",
189
+ "\n",
190
+ "# Set the rotation and alignment of x-tick labels in the second subplot\n",
191
+ "axs[1].tick_params(axis='x', rotation=45)\n",
192
+ "\n",
193
+ "# Save the plot as a high-resolution image file\n",
194
+ "out_path = osp.join(out_dir, 'image_per_object_category.png')\n",
195
+ "plt.savefig(out_path, dpi=300)\n",
196
+ "\n",
197
+ "# Show the plot\n",
198
+ "plt.show()\n",
199
+ "\n"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": 6,
205
+ "id": "23c503dd",
206
+ "metadata": {},
207
+ "outputs": [
208
+ {
209
+ "name": "stderr",
210
+ "output_type": "stream",
211
+ "text": [
212
+ "/tmp/ipykernel_135310/1442551168.py:22: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
213
+ " ax.set_xticklabels(X1)\n"
214
+ ]
215
+ },
216
+ {
217
+ "data": {
218
+ "image/png": "\n",
219
+ "text/plain": [
220
+ "<Figure size 1000x800 with 1 Axes>"
221
+ ]
222
+ },
223
+ "metadata": {},
224
+ "output_type": "display_data"
225
+ }
226
+ ],
227
+ "source": [
228
+ "import matplotlib\n",
229
+ "import matplotlib.pyplot as plt\n",
230
+ "import numpy as np\n",
231
+ "import seaborn as sns\n",
232
+ "\n",
233
+ "# Create data\n",
234
+ "X1 = ['A', 'B', 'C', 'D', 'E']\n",
235
+ "Y1 = [10, 15, 20, 25, 30]\n",
236
+ "Y2 = [20, 25, 30, 35, 40]\n",
237
+ "\n",
238
+ "# Set color scheme and font\n",
239
+ "colors = sns.color_palette(\"Blues\", 2)\n",
240
+ "\n",
241
+ "# Create bar plot\n",
242
+ "fig, ax = plt.subplots(figsize=(10, 8))\n",
243
+ "ax.bar(X1, Y1, color=colors[0], edgecolor='white', linewidth=1)\n",
244
+ "ax.bar(X1, Y2, bottom=Y1, color=colors[1], edgecolor='white', linewidth=1)\n",
245
+ "\n",
246
+ "# Set axis labels and ticks\n",
247
+ "ax.set_xlabel('X1')\n",
248
+ "ax.set_ylabel('Y')\n",
249
+ "ax.set_xticklabels(X1)\n",
250
+ "ax.tick_params(axis='both', which='major', labelsize=14)\n",
251
+ "ax.grid(axis='y', alpha=0.4)\n",
252
+ "\n",
253
+ "# Add legend and title\n",
254
+ "ax.legend(['Y1', 'Y2'], loc='upper left')\n",
255
+ "ax.set_title('Vertical stacked bar graph of Y1 and Y2')\n",
256
+ "\n",
257
+ "# Save the graph as PDF or PNG\n",
258
+ "plt.savefig('vertical_stacked_bar_graph.png', dpi=300, bbox_inches='tight')"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": null,
264
+ "id": "4c055ec7",
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": []
268
+ }
269
+ ],
270
+ "metadata": {
271
+ "kernelspec": {
272
+ "display_name": "Python 3 (ipykernel)",
273
+ "language": "python",
274
+ "name": "python3"
275
+ },
276
+ "language_info": {
277
+ "codemirror_mode": {
278
+ "name": "ipython",
279
+ "version": 3
280
+ },
281
+ "file_extension": ".py",
282
+ "mimetype": "text/x-python",
283
+ "name": "python",
284
+ "nbconvert_exporter": "python",
285
+ "pygments_lexer": "ipython3",
286
+ "version": "3.8.5"
287
+ }
288
+ },
289
+ "nbformat": 4,
290
+ "nbformat_minor": 5
291
+ }
hot_analysis/create_part_probability_mesh.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import json
4
+ import trimesh
5
+ import seaborn as sns
6
+
7
+
8
+ # Load the combined dca train, val and test npzs
9
+ dir = '/is/cluster/work/stripathi/pycharm_remote/dca_contact/data/dataset_extras'
10
+ trainval_npz = np.load(os.path.join(dir, 'hot_dca_trainval.npz'), allow_pickle=True)
11
+ test_npz = np.load(os.path.join(dir, 'hot_dca_test.npz'), allow_pickle=True)
12
+
13
+ # combine the two npz
14
+ combined_npz = {}
15
+ for key in trainval_npz.keys():
16
+ combined_npz[key] = np.concatenate([trainval_npz[key], test_npz[key]], axis=0)
17
+
18
+ segmentation_path = 'data/smpl_vert_segmentation.json'
19
+ with open(segmentation_path, 'rb') as f:
20
+ part_segmentation = json.load(f)
21
+
22
+ combine_keys = {'leftFoot': ['leftToeBase'],
23
+ 'rightFoot': ['rightToeBase'],
24
+ 'leftHand': ['leftHandIndex1'],
25
+ 'rightHand': ['rightHandIndex1'],
26
+ 'spine': ['spine1', 'spine2'],
27
+ 'head': ['neck'],}
28
+
29
+ for key in combine_keys:
30
+ for subkey in combine_keys[key]:
31
+ part_segmentation[key] += part_segmentation[subkey]
32
+ del part_segmentation[subkey]
33
+
34
+ # reverse the part segmentation
35
+ part_segmentation_rev = {}
36
+ for part in part_segmentation:
37
+ for vert in part_segmentation[part]:
38
+ part_segmentation_rev[vert] = part
39
+
40
+ # count the number of contact instances per vertex
41
+ per_vert_contact_count = np.zeros(6890)
42
+ for cls in combined_npz['contact_label']:
43
+ per_vert_contact_count += cls
44
+
45
+ # calculate the maximum contact count per part
46
+ part_contact_max = {}
47
+ for part in part_segmentation:
48
+ part_contact_max[part] = np.max(per_vert_contact_count[part_segmentation[part]])
49
+
50
+ # calculate the contact probability globally
51
+ contact_prob = np.zeros(6890)
52
+ for vid in range(6890):
53
+ contact_prob[vid] = (per_vert_contact_count[vid] / max(per_vert_contact_count)) ** 0.3
54
+
55
+ # save the contact probability mesh
56
+ outdir = "/is/cluster/work/stripathi/pycharm_remote/dca_contact/hot_analysis"
57
+
58
+ # load template smpl mesh
59
+ mesh = trimesh.load_mesh('data/smpl/smpl_neutral_tpose.ply')
60
+ vertex_colors = trimesh.visual.interpolate(contact_prob, 'jet')
61
+ # set the vertex colors of the mesh
62
+ mesh.visual.vertex_colors = vertex_colors
63
+ # save the mesh
64
+ out_path = os.path.join(outdir, "contact_probability_mesh.obj")
65
+ mesh.export(out_path)
66
+
67
+ # # calculate the contact probability per part
68
+ # contact_prob = np.zeros(6890)
69
+ # for vid in range(6890):
70
+ # if 'Hand' in part_segmentation_rev[vid]:
71
+ # contact_prob[vid] = (per_vert_contact_count[vid] / part_contact_max[part_segmentation_rev[vid]]) ** 0.4 if 'Hand' not in part_segmentation_rev[vid] else (per_vert_contact_count[vid] / part_contact_max[part_segmentation_rev[vid]]) ** 0.8
72
+ #
73
+ # # save the contact probability mesh
74
+ # outdir = "/is/cluster/work/stripathi/pycharm_remote/dca_contact/hot_analysis"
75
+ #
76
+ # # load template smpl mesh
77
+ # mesh = trimesh.load_mesh('data/smpl/smpl_neutral_tpose.ply')
78
+ # vertex_colors = trimesh.visual.interpolate(contact_prob, 'jet')
79
+ # # set the vertex colors of the mesh
80
+ # mesh.visual.vertex_colors = vertex_colors
81
+ # # save the mesh
82
+ # out_path = os.path.join(outdir, "contact_probability_mesh_part.obj")
83
+ # mesh.export(out_path)
84
+
85
+
86
+
hot_analysis/damon_qc_stats/compute_accuracy_iou_damon.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # load amt csv, go through each line in vertices, combine the vertices for each object label and then compute the iou with GT from RICH and PROX
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ # load csv
6
+ csv_path = './quality_assurance_accuracy.csv'
7
+ df = pd.read_csv(csv_path)
8
+
9
+ # load gt npz
10
+ gt_path = './qa_accuracy_gt_contact_combined.npz'
11
+ gt = np.load(gt_path)
12
+
13
+ def compute_iou(pred_verts, gt_verts):
14
+ if len(pred_verts) != 0:
15
+ intersect = list(set(pred_verts) & set(gt_verts))
16
+ iou = len(intersect) / (len(pred_verts) + len(gt_verts) - len(intersect))
17
+ else:
18
+ iou = 0
19
+ return iou
20
+
21
+ all_ious = []
22
+ # for loop each row in df
23
+ for index, row in df.iterrows():
24
+ combined_annotation_ids = []
25
+ imgname = []
26
+ # get vertices
27
+ annotation_dict = eval(row['vertices'])
28
+ worker_id = row['WorkerId']
29
+ # single for loop in the dictionary
30
+ for im, anno in annotation_dict.items():
31
+ imgname.append(im)
32
+ for ann in anno:
33
+ # single for loop in the dict
34
+ for k, v in ann.items():
35
+ combined_annotation_ids.extend(v)
36
+ # remove repeated values
37
+ combined_annotation_ids = list(set(combined_annotation_ids))
38
+
39
+ assert len(imgname) == 1
40
+ imgname = imgname[0]
41
+
42
+ # get gt for the imgname
43
+ gt_ids = gt[imgname]
44
+ if 'prox' in imgname:
45
+ continue
46
+
47
+ # compute iou
48
+ iou = compute_iou(combined_annotation_ids, gt_ids)
49
+ print('worker id: ', worker_id, 'imgname: ', imgname, 'iou: ', iou)
50
+ all_ious.append(iou)
51
+
52
+ # compute mean iou
53
+ mean_iou = np.mean(all_ious)
54
+ print('mean iou: ', mean_iou)
55
+
56
+
57
+
58
+
59
+
hot_analysis/damon_qc_stats/compute_fleiss_kappa_damon.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from scipy.stats import chi2
4
+
5
+
6
+ def correct_supporting(vertices):
7
+ # Copy vertices from objects to supporting since the dropdown option was missing in the QA app
8
+ def add_supporting(verts, object_name):
9
+ temp_supporting_vids = []
10
+ supporting_vid = -1
11
+ for id, v_i in enumerate(verts):
12
+ # single key dict
13
+ for k, v in v_i.items():
14
+ if k == object_name:
15
+ temp_supporting_vids = v
16
+ if k == 'SUPPORTING':
17
+ supporting_vid = id
18
+ if supporting_vid != -1:
19
+ # append to supporting
20
+ verts[supporting_vid]['SUPPORTING'] += temp_supporting_vids
21
+ return verts
22
+
23
+ # correct supporting contacts
24
+ for i, vert in enumerate(vertices):
25
+ for k, v in vert.items():
26
+ if k == 'hot/training/hake_train2015_HICO_train2015_00000019.jpg':
27
+ # copy bicycle contacts to supporting
28
+ v = add_supporting(v, 'BICYCLE')
29
+ if k == 'hot/training/hake_train2015_HICO_train2015_00000020.jpg':
30
+ # copy skateboard contacts to supporting
31
+ v = add_supporting(v, 'SKATEBOARD')
32
+ if k == 'hot/training/hake_train2015_HICO_train2015_00000942':
33
+ # copy bench contacts to supporting
34
+ v = add_supporting(v, 'BENCH')
35
+
36
+ # combine all vert_ids into a single list no matter the object
37
+ v = {ki: vi for d in v for ki, vi in d.items()}
38
+ v = [vi for k, vi in v.items()]
39
+ v = [item for sublist in v for item in sublist]
40
+ v = list(set(v))
41
+ # binarize the list to a numpy array
42
+ v_np = np.zeros(6890)
43
+ v_np[v] = 1
44
+ vert[k] = v_np
45
+ vertices[i] = vert
46
+ return vertices
47
+
48
+ def fleiss_kappa_per_img(vertices):
49
+ """
50
+ Compute Fleiss' kappa per imagename
51
+ Parameters
52
+ ----------
53
+ vertices : list of np arrays where each array is of shape (6890,) and 1 indicates a vertex is selected
54
+ """
55
+ n = len(vertices) # number of raters
56
+ N = 6890 # number of images
57
+ k = 2 # number of categories
58
+
59
+ # compute the observed agreement
60
+ M = np.zeros((N, k))
61
+
62
+ for i in range(k):
63
+ M[:, i] = np.sum(vertices == i, axis=0)
64
+
65
+ assert np.sum(M) == N * n
66
+
67
+ # compute the expected agreement
68
+ p = np.sum(M, axis=0) / (N * n)
69
+ P = (np.sum(M * M, axis=1) - n) / (n * (n - 1))
70
+ Pbar = np.mean(P)
71
+ PbarE = np.sum(p * p)
72
+
73
+ # compute Fleiss' kappa
74
+ kappa = (Pbar - PbarE) / (1 - PbarE)
75
+ return kappa
76
+
77
+ def fleiss_kappa(data):
78
+ """
79
+ Compute Fleiss' kappa per imagename
80
+ Parameters
81
+ ----------
82
+ data : list of dicts where keys are imgnames
83
+ """
84
+ imgnames = sorted(data[0].keys())
85
+ kappas = []
86
+ for img in imgnames:
87
+ kappa_data = []
88
+ for d in data:
89
+ kappa_data.append(d[img])
90
+ kappa_data = np.array(kappa_data)
91
+ kappa_img = fleiss_kappa_per_img(kappa_data)
92
+ print(f'Fleiss\' Kappa for {img}: {kappa_img}')
93
+ kappas.append(kappa_img)
94
+
95
+ # computer mean kappa
96
+ kappa = np.mean(kappas)
97
+ return kappa
98
+
99
+
100
+ # Load the combined qa csv file
101
+ csv_file = 'quality_assurance_fleiss.csv'
102
+ df = pd.read_csv(csv_file)
103
+
104
+ vertices = df['vertices'].values
105
+ vertices = [eval(v) for v in vertices]
106
+
107
+ vertices = correct_supporting(vertices)
108
+
109
+ kappa = fleiss_kappa(vertices)
110
+
111
+ print('Fleiss\' Kappa:', kappa)
hot_analysis/damon_qc_stats/qa_accuracy_gt_contact_combined.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b34393328dae058aa12245f547ee954f48508bf0c4ed7ecbe40a674245ba5717
3
+ size 456742
hot_analysis/damon_qc_stats/quality_assurance_accuracy.csv ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/damon_qc_stats/quality_assurance_fleiss.csv ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/damon_qc_stats/successful_qualifications_fleiss.csv ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico/hico_imglist_all_140223.txt ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico/image_per_object_category.png ADDED
hot_analysis/filtered_data/v_1/hico/imgnames_per_object_dict.json ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico/imgnames_per_object_dict.txt ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico/object_per_image_dict.json ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico/object_per_image_dict.txt ADDED
The diff for this file is too large to render. See raw diff
 
hot_analysis/filtered_data/v_1/hico_imglist_all_140223.txt ADDED
The diff for this file is too large to render. See raw diff