Chao Xu
commited on
Commit
·
854f0d0
1
Parent(s):
1fae98d
sparseneus and elev est
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +2 -1
- SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf +137 -0
- SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf +137 -0
- SparseNeuS_demo_v1/data/__init__.py +0 -0
- SparseNeuS_demo_v1/data/blender.py +340 -0
- SparseNeuS_demo_v1/data/blender_general.py +432 -0
- SparseNeuS_demo_v1/data/blender_general_12_narrow.py +427 -0
- SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py +427 -0
- SparseNeuS_demo_v1/data/blender_general_360.py +412 -0
- SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py +406 -0
- SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py +411 -0
- SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py +480 -0
- SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py +476 -0
- SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py +449 -0
- SparseNeuS_demo_v1/data/blender_general_8_2_stage.py +396 -0
- SparseNeuS_demo_v1/data/blender_general_8_4_gt.py +396 -0
- SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py +446 -0
- SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py +439 -0
- SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py +470 -0
- SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py +395 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py +418 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_6.py +399 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py +393 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py +395 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py +432 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all.py +386 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py +410 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py +411 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py +418 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py +414 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py +465 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py +419 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py +420 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py +428 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py +420 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py +417 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py +388 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py +389 -0
- SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py +395 -0
- SparseNeuS_demo_v1/data/blender_gt_32.py +419 -0
- SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt +93 -0
- SparseNeuS_demo_v1/data/dtu/lists/test.txt +15 -0
- SparseNeuS_demo_v1/data/dtu/lists/train.txt +75 -0
- SparseNeuS_demo_v1/data/dtu_fit.py +278 -0
- SparseNeuS_demo_v1/data/dtu_general.py +376 -0
- SparseNeuS_demo_v1/data/scene.py +102 -0
- SparseNeuS_demo_v1/evaluation/__init__.py +0 -0
- SparseNeuS_demo_v1/evaluation/clean_mesh.py +283 -0
- SparseNeuS_demo_v1/evaluation/eval_dtu_python.py +369 -0
- SparseNeuS_demo_v1/exp/lod0/checkpoint_trash/ckpt_285000.pth +3 -0
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
__pycache__/
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.DS_Store
|
SparseNeuS_demo_v1/confs/blender_general_lod1_val_new.conf
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
|
2 |
+
#- for lod1 rendering network, using depth-adaptive render
|
3 |
+
|
4 |
+
general {
|
5 |
+
base_exp_dir = ./exp/val/1_4_only_narrow_lod1
|
6 |
+
|
7 |
+
recording = [
|
8 |
+
./,
|
9 |
+
./data
|
10 |
+
./ops
|
11 |
+
./models
|
12 |
+
./loss
|
13 |
+
]
|
14 |
+
}
|
15 |
+
|
16 |
+
dataset {
|
17 |
+
# local path
|
18 |
+
trainpath = /objaverse-processed/zero12345_img/eval_selected
|
19 |
+
valpath = /objaverse-processed/zero12345_img/eval_selected
|
20 |
+
testpath = /objaverse-processed/zero12345_img/eval_selected
|
21 |
+
# trainpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
|
22 |
+
# valpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
|
23 |
+
# testpath = /objaverse-processed/zero12345_img/zero12345_2stage_5pred_sample/
|
24 |
+
imgScale_train = 1.0
|
25 |
+
imgScale_test = 1.0
|
26 |
+
nviews = 5
|
27 |
+
clean_image = True
|
28 |
+
importance_sample = True
|
29 |
+
test_ref_views = [23]
|
30 |
+
|
31 |
+
# test dataset
|
32 |
+
test_n_views = 2
|
33 |
+
test_img_wh = [256, 256]
|
34 |
+
test_clip_wh = [0, 0]
|
35 |
+
test_scan_id = scan110
|
36 |
+
train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] #
|
37 |
+
test_img_idx = [51, 55, 57] #[32, 33, 34] #
|
38 |
+
|
39 |
+
test_dir_comment = train
|
40 |
+
}
|
41 |
+
|
42 |
+
train {
|
43 |
+
learning_rate = 2e-4
|
44 |
+
learning_rate_milestone = [100000, 150000, 200000]
|
45 |
+
learning_rate_factor = 0.5
|
46 |
+
end_iter = 200000
|
47 |
+
save_freq = 5000
|
48 |
+
val_freq = 1
|
49 |
+
val_mesh_freq =1
|
50 |
+
report_freq = 100
|
51 |
+
|
52 |
+
N_rays = 512
|
53 |
+
|
54 |
+
validate_resolution_level = 4
|
55 |
+
anneal_start = 0
|
56 |
+
anneal_end = 25000
|
57 |
+
anneal_start_lod1 = 0
|
58 |
+
anneal_end_lod1 = 15000
|
59 |
+
|
60 |
+
use_white_bkgd = True
|
61 |
+
|
62 |
+
# Loss
|
63 |
+
# ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
|
64 |
+
sdf_igr_weight = 0.1
|
65 |
+
sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network
|
66 |
+
sdf_decay_param = 100 # cannot be too large, which decide the tsdf range
|
67 |
+
fg_bg_weight = 0.01 # first 0.01
|
68 |
+
bg_ratio = 0.3
|
69 |
+
|
70 |
+
if_fix_lod0_networks = True
|
71 |
+
}
|
72 |
+
|
73 |
+
model {
|
74 |
+
num_lods = 2
|
75 |
+
|
76 |
+
sdf_network_lod0 {
|
77 |
+
lod = 0,
|
78 |
+
ch_in = 56, # the channel num of fused pyramid features
|
79 |
+
voxel_size = 0.02105263, # 0.02083333, should be 2/95
|
80 |
+
vol_dims = [96, 96, 96],
|
81 |
+
hidden_dim = 128,
|
82 |
+
cost_type = variance_mean
|
83 |
+
d_pyramid_feature_compress = 16,
|
84 |
+
regnet_d_out = 16,
|
85 |
+
num_sdf_layers = 4,
|
86 |
+
# position embedding
|
87 |
+
multires = 6
|
88 |
+
}
|
89 |
+
|
90 |
+
|
91 |
+
sdf_network_lod1 {
|
92 |
+
lod = 1,
|
93 |
+
ch_in = 56, # the channel num of fused pyramid features
|
94 |
+
voxel_size = 0.0104712, #0.01041667, should be 2/191
|
95 |
+
vol_dims = [192, 192, 192],
|
96 |
+
hidden_dim = 128,
|
97 |
+
cost_type = variance_mean
|
98 |
+
d_pyramid_feature_compress = 8,
|
99 |
+
regnet_d_out = 8,
|
100 |
+
num_sdf_layers = 4,
|
101 |
+
# position embedding
|
102 |
+
multires = 6
|
103 |
+
}
|
104 |
+
|
105 |
+
|
106 |
+
variance_network {
|
107 |
+
init_val = 0.2
|
108 |
+
}
|
109 |
+
|
110 |
+
variance_network_lod1 {
|
111 |
+
init_val = 0.2
|
112 |
+
}
|
113 |
+
|
114 |
+
rendering_network {
|
115 |
+
in_geometry_feat_ch = 16
|
116 |
+
in_rendering_feat_ch = 56
|
117 |
+
anti_alias_pooling = True
|
118 |
+
}
|
119 |
+
|
120 |
+
rendering_network_lod1 {
|
121 |
+
in_geometry_feat_ch = 8
|
122 |
+
in_rendering_feat_ch = 56
|
123 |
+
anti_alias_pooling = True
|
124 |
+
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
trainer {
|
129 |
+
n_samples_lod0 = 64
|
130 |
+
n_importance_lod0 = 64
|
131 |
+
n_samples_lod1 = 64
|
132 |
+
n_importance_lod1 = 64
|
133 |
+
n_outside = 0 # 128 if render_outside_uniform_sampling
|
134 |
+
perturb = 1.0
|
135 |
+
alpha_type = div
|
136 |
+
}
|
137 |
+
}
|
SparseNeuS_demo_v1/confs/one2345_lod0_val_demo.conf
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# - for the lod1 geometry network, using adaptive cost for sparse cost regularization network
|
2 |
+
#- for lod1 rendering network, using depth-adaptive render
|
3 |
+
|
4 |
+
general {
|
5 |
+
|
6 |
+
base_exp_dir = exp/lod0 # !!! where you store the results and checkpoints to be used
|
7 |
+
recording = [
|
8 |
+
./,
|
9 |
+
./data
|
10 |
+
./ops
|
11 |
+
./models
|
12 |
+
./loss
|
13 |
+
]
|
14 |
+
}
|
15 |
+
|
16 |
+
dataset {
|
17 |
+
trainpath = ../
|
18 |
+
valpath = ../ # !!! where you store the validation data
|
19 |
+
testpath = ../
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
imgScale_train = 1.0
|
24 |
+
imgScale_test = 1.0
|
25 |
+
nviews = 5
|
26 |
+
clean_image = True
|
27 |
+
importance_sample = True
|
28 |
+
test_ref_views = [23]
|
29 |
+
|
30 |
+
# test dataset
|
31 |
+
test_n_views = 2
|
32 |
+
test_img_wh = [256, 256]
|
33 |
+
test_clip_wh = [0, 0]
|
34 |
+
test_scan_id = scan110
|
35 |
+
train_img_idx = [49, 50, 52, 53, 54, 56, 58] #[21, 22, 23, 24, 25] #
|
36 |
+
test_img_idx = [51, 55, 57] #[32, 33, 34] #
|
37 |
+
|
38 |
+
test_dir_comment = train
|
39 |
+
}
|
40 |
+
|
41 |
+
train {
|
42 |
+
learning_rate = 2e-4
|
43 |
+
learning_rate_milestone = [100000, 150000, 200000]
|
44 |
+
learning_rate_factor = 0.5
|
45 |
+
end_iter = 200000
|
46 |
+
save_freq = 5000
|
47 |
+
val_freq = 1
|
48 |
+
val_mesh_freq = 1
|
49 |
+
report_freq = 100
|
50 |
+
|
51 |
+
N_rays = 512
|
52 |
+
|
53 |
+
validate_resolution_level = 4
|
54 |
+
anneal_start = 0
|
55 |
+
anneal_end = 25000
|
56 |
+
anneal_start_lod1 = 0
|
57 |
+
anneal_end_lod1 = 15000
|
58 |
+
|
59 |
+
use_white_bkgd = True
|
60 |
+
|
61 |
+
# Loss
|
62 |
+
# ! for training the lod1 network, don't use this regularization in first 10k steps; then use the regularization
|
63 |
+
sdf_igr_weight = 0.1
|
64 |
+
sdf_sparse_weight = 0.02 # 0.002 for lod1 network; 0.02 for lod0 network
|
65 |
+
sdf_decay_param = 100 # cannot be too large, which decide the tsdf range
|
66 |
+
fg_bg_weight = 0.01 # first 0.01
|
67 |
+
bg_ratio = 0.3
|
68 |
+
|
69 |
+
if_fix_lod0_networks = False
|
70 |
+
}
|
71 |
+
|
72 |
+
model {
|
73 |
+
num_lods = 1
|
74 |
+
|
75 |
+
sdf_network_lod0 {
|
76 |
+
lod = 0,
|
77 |
+
ch_in = 56, # the channel num of fused pyramid features
|
78 |
+
voxel_size = 0.02105263, # 0.02083333, should be 2/95
|
79 |
+
vol_dims = [96, 96, 96],
|
80 |
+
hidden_dim = 128,
|
81 |
+
cost_type = variance_mean
|
82 |
+
d_pyramid_feature_compress = 16,
|
83 |
+
regnet_d_out = 16,
|
84 |
+
num_sdf_layers = 4,
|
85 |
+
# position embedding
|
86 |
+
multires = 6
|
87 |
+
}
|
88 |
+
|
89 |
+
|
90 |
+
sdf_network_lod1 {
|
91 |
+
lod = 1,
|
92 |
+
ch_in = 56, # the channel num of fused pyramid features
|
93 |
+
voxel_size = 0.0104712, #0.01041667, should be 2/191
|
94 |
+
vol_dims = [192, 192, 192],
|
95 |
+
hidden_dim = 128,
|
96 |
+
cost_type = variance_mean
|
97 |
+
d_pyramid_feature_compress = 8,
|
98 |
+
regnet_d_out = 16,
|
99 |
+
num_sdf_layers = 4,
|
100 |
+
|
101 |
+
# position embedding
|
102 |
+
multires = 6
|
103 |
+
}
|
104 |
+
|
105 |
+
|
106 |
+
variance_network {
|
107 |
+
init_val = 0.2
|
108 |
+
}
|
109 |
+
|
110 |
+
variance_network_lod1 {
|
111 |
+
init_val = 0.2
|
112 |
+
}
|
113 |
+
|
114 |
+
rendering_network {
|
115 |
+
in_geometry_feat_ch = 16
|
116 |
+
in_rendering_feat_ch = 56
|
117 |
+
anti_alias_pooling = True
|
118 |
+
}
|
119 |
+
|
120 |
+
rendering_network_lod1 {
|
121 |
+
in_geometry_feat_ch = 16 # default 8
|
122 |
+
in_rendering_feat_ch = 56
|
123 |
+
anti_alias_pooling = True
|
124 |
+
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
trainer {
|
129 |
+
n_samples_lod0 = 64
|
130 |
+
n_importance_lod0 = 64
|
131 |
+
n_samples_lod1 = 64
|
132 |
+
n_importance_lod1 = 64
|
133 |
+
n_outside = 0 # 128 if render_outside_uniform_sampling
|
134 |
+
perturb = 1.0
|
135 |
+
alpha_type = div
|
136 |
+
}
|
137 |
+
}
|
SparseNeuS_demo_v1/data/__init__.py
ADDED
File without changes
|
SparseNeuS_demo_v1/data/blender.py
ADDED
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.utils.data import Dataset
|
3 |
+
import json
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
from PIL import Image
|
7 |
+
from torchvision import transforms as T
|
8 |
+
from kornia import create_meshgrid
|
9 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
10 |
+
import cv2 as cv
|
11 |
+
from data.scene import get_boundingbox
|
12 |
+
|
13 |
+
|
14 |
+
def get_ray_directions(H, W, focal, center=None):
|
15 |
+
"""
|
16 |
+
Get ray directions for all pixels in camera coordinate.
|
17 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
18 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
19 |
+
Inputs:
|
20 |
+
H, W, focal: image height, width and focal length
|
21 |
+
Outputs:
|
22 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
23 |
+
"""
|
24 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0]
|
25 |
+
i, j = grid.unbind(-1)
|
26 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
27 |
+
# see https://github.com/bmild/nerf/issues/24
|
28 |
+
cent = center if center is not None else [W / 2, H / 2]
|
29 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
30 |
+
|
31 |
+
return directions
|
32 |
+
|
33 |
+
def get_rays(directions, c2w):
|
34 |
+
"""
|
35 |
+
Get ray origin and normalized directions in world coordinate for all pixels in one image.
|
36 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
37 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
38 |
+
Inputs:
|
39 |
+
directions: (H, W, 3) precomputed ray directions in camera coordinate
|
40 |
+
c2w: (3, 4) transformation matrix from camera coordinate to world coordinate
|
41 |
+
Outputs:
|
42 |
+
rays_o: (H*W, 3), the origin of the rays in world coordinate
|
43 |
+
rays_d: (H*W, 3), the normalized direction of the rays in world coordinate
|
44 |
+
"""
|
45 |
+
# Rotate ray directions from camera coordinate to the world coordinate
|
46 |
+
rays_d = directions @ c2w[:3, :3].T # (H, W, 3)
|
47 |
+
# rays_d = rays_d / torch.norm(rays_d, dim=-1, keepdim=True)
|
48 |
+
# The origin of all rays is the camera origin in world coordinate
|
49 |
+
rays_o = c2w[:3, 3].expand(rays_d.shape) # (H, W, 3)
|
50 |
+
|
51 |
+
rays_d = rays_d.view(-1, 3)
|
52 |
+
rays_o = rays_o.view(-1, 3)
|
53 |
+
|
54 |
+
return rays_o, rays_d
|
55 |
+
|
56 |
+
|
57 |
+
def load_K_Rt_from_P(filename, P=None):
|
58 |
+
if P is None:
|
59 |
+
lines = open(filename).read().splitlines()
|
60 |
+
if len(lines) == 4:
|
61 |
+
lines = lines[1:]
|
62 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
63 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
64 |
+
|
65 |
+
out = cv.decomposeProjectionMatrix(P)
|
66 |
+
K = out[0]
|
67 |
+
R = out[1]
|
68 |
+
t = out[2]
|
69 |
+
|
70 |
+
K = K / K[2, 2]
|
71 |
+
intrinsics = np.eye(4)
|
72 |
+
intrinsics[:3, :3] = K
|
73 |
+
|
74 |
+
pose = np.eye(4, dtype=np.float32)
|
75 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
76 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
77 |
+
|
78 |
+
return intrinsics, pose # ! return cam2world matrix here
|
79 |
+
|
80 |
+
|
81 |
+
class BlenderDataset(Dataset):
|
82 |
+
def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
|
83 |
+
img_wh=[800, 800], clip_wh=[0, 0], original_img_wh=[800, 800],
|
84 |
+
N_rays=512, h_patch_size=5, near=2.0, far=6.0):
|
85 |
+
self.root_dir = root_dir
|
86 |
+
self.split = split
|
87 |
+
self.img_wh = img_wh
|
88 |
+
self.clip_wh = clip_wh
|
89 |
+
self.define_transforms()
|
90 |
+
self.train_img_idx = train_img_idx
|
91 |
+
self.test_img_idx = test_img_idx
|
92 |
+
self.N_rays = N_rays
|
93 |
+
self.h_patch_size = h_patch_size # used to extract patch for supervision
|
94 |
+
self.n_views = n_views
|
95 |
+
self.near, self.far = near, far
|
96 |
+
self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
|
97 |
+
|
98 |
+
with open(os.path.join(self.root_dir, f"transforms_{self.split}.json"), 'r') as f:
|
99 |
+
self.meta = json.load(f)
|
100 |
+
|
101 |
+
|
102 |
+
self.read_meta(near, far)
|
103 |
+
# import ipdb; ipdb.set_trace()
|
104 |
+
self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.meta['frames']))])
|
105 |
+
|
106 |
+
|
107 |
+
# ! estimate scale_mat
|
108 |
+
self.scale_mat, self.scale_factor = self.cal_scale_mat(
|
109 |
+
img_hw=[self.img_wh[1], self.img_wh[0]],
|
110 |
+
intrinsics=self.all_intrinsics[self.train_img_idx],
|
111 |
+
extrinsics=self.all_w2cs[self.train_img_idx],
|
112 |
+
near_fars=self.raw_near_fars[self.train_img_idx],
|
113 |
+
factor=1.1)
|
114 |
+
# self.scale_mat = np.eye(4)
|
115 |
+
# self.scale_factor = 1.0
|
116 |
+
# import ipdb; ipdb.set_trace()
|
117 |
+
# * after scaling and translation, unit bounding box
|
118 |
+
self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
|
119 |
+
self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
|
120 |
+
|
121 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
122 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
123 |
+
self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
|
124 |
+
self.white_back = True
|
125 |
+
|
126 |
+
def read_meta(self, near=2.0, far=6.0):
|
127 |
+
|
128 |
+
|
129 |
+
self.ref_img_idx = self.train_img_idx[0]
|
130 |
+
ref_c2w = np.array(self.meta['frames'][self.ref_img_idx]['transform_matrix']) @ self.blender2opencv
|
131 |
+
# ref_c2w = torch.FloatTensor(ref_c2w)
|
132 |
+
self.ref_c2w = ref_c2w
|
133 |
+
self.ref_w2c = np.linalg.inv(ref_c2w)
|
134 |
+
|
135 |
+
|
136 |
+
w, h = self.img_wh
|
137 |
+
self.focal = 0.5 * 800 / np.tan(0.5 * self.meta['camera_angle_x']) # original focal length
|
138 |
+
self.focal *= self.img_wh[0] / 800 # modify focal length to match size self.img_wh
|
139 |
+
|
140 |
+
# bounds, common for all scenes
|
141 |
+
self.near = near
|
142 |
+
self.far = far
|
143 |
+
self.bounds = np.array([self.near, self.far])
|
144 |
+
|
145 |
+
# ray directions for all pixels, same for all images (same H, W, focal)
|
146 |
+
self.directions = get_ray_directions(h, w, [self.focal,self.focal]) # (h, w, 3)
|
147 |
+
intrinsics = np.eye(4)
|
148 |
+
intrinsics[:3, :3] = np.array([[self.focal,0,w/2],[0,self.focal,h/2],[0,0,1]]).astype(np.float32)
|
149 |
+
self.intrinsics = intrinsics
|
150 |
+
|
151 |
+
self.image_paths = []
|
152 |
+
self.poses = []
|
153 |
+
self.all_rays = []
|
154 |
+
self.all_images = []
|
155 |
+
self.all_masks = []
|
156 |
+
self.all_w2cs = []
|
157 |
+
self.all_intrinsics = []
|
158 |
+
for frame in self.meta['frames']:
|
159 |
+
pose = np.array(frame['transform_matrix']) @ self.blender2opencv
|
160 |
+
self.poses += [pose]
|
161 |
+
c2w = torch.FloatTensor(pose)
|
162 |
+
w2c = np.linalg.inv(c2w)
|
163 |
+
image_path = os.path.join(self.root_dir, f"{frame['file_path']}.png")
|
164 |
+
self.image_paths += [image_path]
|
165 |
+
img = Image.open(image_path)
|
166 |
+
img = img.resize(self.img_wh, Image.LANCZOS)
|
167 |
+
img = self.transform(img) # (4, h, w)
|
168 |
+
|
169 |
+
self.all_masks += [img[-1:,:]>0]
|
170 |
+
# img = img[:3, :] * img[ -1:,:] + (1 - img[-1:, :]) # blend A to RGB
|
171 |
+
img = img[:3, :] * img[ -1:,:]
|
172 |
+
img = img.numpy() # (3, h, w)
|
173 |
+
self.all_images += [img]
|
174 |
+
|
175 |
+
|
176 |
+
self.all_masks += []
|
177 |
+
self.all_intrinsics.append(self.intrinsics)
|
178 |
+
# - transform from world system to ref-camera system
|
179 |
+
self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
|
180 |
+
|
181 |
+
self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
|
182 |
+
self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
|
183 |
+
self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
|
184 |
+
# self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
|
185 |
+
# self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
|
186 |
+
|
187 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
188 |
+
center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
189 |
+
radius = radius * factor
|
190 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
191 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
192 |
+
scale_mat = scale_mat.astype(np.float32)
|
193 |
+
|
194 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
195 |
+
|
196 |
+
def scale_cam_info(self):
|
197 |
+
new_intrinsics = []
|
198 |
+
new_near_fars = []
|
199 |
+
new_w2cs = []
|
200 |
+
new_c2ws = []
|
201 |
+
new_affine_mats = []
|
202 |
+
for idx in range(len(self.all_images)):
|
203 |
+
|
204 |
+
intrinsics = self.all_intrinsics[idx]
|
205 |
+
# import ipdb; ipdb.set_trace()
|
206 |
+
P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
|
207 |
+
P = P.cpu().numpy()[:3, :4]
|
208 |
+
|
209 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
210 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
new_w2cs.append(w2c)
|
213 |
+
new_c2ws.append(c2w)
|
214 |
+
new_intrinsics.append(intrinsics)
|
215 |
+
affine_mat = np.eye(4)
|
216 |
+
affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
|
217 |
+
new_affine_mats.append(affine_mat)
|
218 |
+
|
219 |
+
camera_o = c2w[:3, 3]
|
220 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
221 |
+
near = dist - 1
|
222 |
+
far = dist + 1
|
223 |
+
|
224 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
225 |
+
|
226 |
+
new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
|
227 |
+
np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
|
228 |
+
np.stack(new_affine_mats), np.stack(new_near_fars)
|
229 |
+
|
230 |
+
new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
|
231 |
+
new_w2cs = torch.from_numpy(np.float32(new_w2cs))
|
232 |
+
new_c2ws = torch.from_numpy(np.float32(new_c2ws))
|
233 |
+
new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
|
234 |
+
new_near_fars = torch.from_numpy(np.float32(new_near_fars))
|
235 |
+
|
236 |
+
return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
|
237 |
+
|
238 |
+
def load_poses_all(self, file=f"transforms_train.json"):
|
239 |
+
with open(os.path.join(self.root_dir, file), 'r') as f:
|
240 |
+
meta = json.load(f)
|
241 |
+
|
242 |
+
c2ws = []
|
243 |
+
for i,frame in enumerate(meta['frames']):
|
244 |
+
c2ws.append(np.array(frame['transform_matrix']) @ self.blender2opencv)
|
245 |
+
return np.stack(c2ws)
|
246 |
+
|
247 |
+
def define_transforms(self):
|
248 |
+
self.transform = T.ToTensor()
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
def get_conditional_sample(self):
|
253 |
+
sample = {}
|
254 |
+
support_idxs = self.train_img_idx
|
255 |
+
|
256 |
+
sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
|
257 |
+
sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
|
258 |
+
sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
|
259 |
+
sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
|
260 |
+
sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
|
261 |
+
sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
|
262 |
+
|
263 |
+
# sample['scan'] = self.scan_id
|
264 |
+
sample['scale_factor'] = torch.tensor(self.scale_factor)
|
265 |
+
sample['scale_mat'] = torch.from_numpy(self.scale_mat)
|
266 |
+
sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
|
267 |
+
sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
|
268 |
+
sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
|
269 |
+
|
270 |
+
return sample
|
271 |
+
|
272 |
+
|
273 |
+
|
274 |
+
def __len__(self):
|
275 |
+
if self.split == 'train':
|
276 |
+
return self.n_views * 1000
|
277 |
+
else:
|
278 |
+
return len(self.test_img_idx) * 1000
|
279 |
+
|
280 |
+
|
281 |
+
def __getitem__(self, idx):
|
282 |
+
sample = {}
|
283 |
+
|
284 |
+
if self.split == 'train':
|
285 |
+
render_idx = self.train_img_idx[idx % self.n_views]
|
286 |
+
support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
|
287 |
+
else:
|
288 |
+
# render_idx = idx % self.n_test_images + self.n_train_images
|
289 |
+
render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
|
290 |
+
support_idxs = [render_idx]
|
291 |
+
|
292 |
+
sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
|
293 |
+
sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
|
294 |
+
sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
|
295 |
+
sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
|
296 |
+
sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
|
297 |
+
# sample['scan'] = self.scan_id
|
298 |
+
sample['scale_factor'] = torch.tensor(self.scale_factor)
|
299 |
+
sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
|
300 |
+
sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
|
301 |
+
sample['img_index'] = torch.tensor(render_idx)
|
302 |
+
|
303 |
+
# - query image
|
304 |
+
sample['query_image'] = self.all_images[render_idx]
|
305 |
+
sample['query_c2w'] = self.scaled_c2ws[render_idx]
|
306 |
+
sample['query_w2c'] = self.scaled_w2cs[render_idx]
|
307 |
+
sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
|
308 |
+
sample['query_near_far'] = self.scaled_near_fars[render_idx]
|
309 |
+
# sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
|
310 |
+
sample['scale_mat'] = torch.from_numpy(self.scale_mat)
|
311 |
+
sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
|
312 |
+
sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
|
313 |
+
sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
|
314 |
+
|
315 |
+
# - generate rays
|
316 |
+
if self.split == 'val' or self.split == 'test':
|
317 |
+
sample_rays = gen_rays_from_single_image(
|
318 |
+
self.img_wh[1], self.img_wh[0],
|
319 |
+
sample['query_image'],
|
320 |
+
sample['query_intrinsic'],
|
321 |
+
sample['query_c2w'],
|
322 |
+
depth=None,
|
323 |
+
mask=None)
|
324 |
+
else:
|
325 |
+
sample_rays = gen_random_rays_from_single_image(
|
326 |
+
self.img_wh[1], self.img_wh[0],
|
327 |
+
self.N_rays,
|
328 |
+
sample['query_image'],
|
329 |
+
sample['query_intrinsic'],
|
330 |
+
sample['query_c2w'],
|
331 |
+
depth=None,
|
332 |
+
mask=None,
|
333 |
+
dilated_mask=None,
|
334 |
+
importance_sample=False,
|
335 |
+
h_patch_size=self.h_patch_size
|
336 |
+
)
|
337 |
+
|
338 |
+
sample['rays'] = sample_rays
|
339 |
+
|
340 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general.py
ADDED
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
|
155 |
+
depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
|
156 |
+
interpolation=cv2.INTER_NEAREST) # (600, 800)
|
157 |
+
depth_h = depth_h[44:556, 80:720] # (512, 640)
|
158 |
+
depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
return depth, depth_h
|
164 |
+
|
165 |
+
def read_mask(self, filename):
|
166 |
+
mask_h = cv2.imread(filename, 0)
|
167 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
168 |
+
interpolation=cv2.INTER_NEAREST)
|
169 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
170 |
+
interpolation=cv2.INTER_NEAREST)
|
171 |
+
|
172 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
173 |
+
mask_h[mask_h > 0] = 1
|
174 |
+
|
175 |
+
return mask, mask_h
|
176 |
+
|
177 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
178 |
+
|
179 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
180 |
+
# print("center", center)
|
181 |
+
# print("radius", radius)
|
182 |
+
# print("bounds", bounds)
|
183 |
+
# import ipdb; ipdb.set_trace()
|
184 |
+
radius = radius * factor
|
185 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
186 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
187 |
+
scale_mat = scale_mat.astype(np.float32)
|
188 |
+
|
189 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
190 |
+
|
191 |
+
def __len__(self):
|
192 |
+
return 8*len(self.lvis_paths)
|
193 |
+
|
194 |
+
|
195 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
196 |
+
depth_h = cv2.imread(filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 65535 * 1.4 + 0.5
|
197 |
+
|
198 |
+
depth_h[depth_h < near_bound+1e-3] = 0.0
|
199 |
+
|
200 |
+
depth = {}
|
201 |
+
for l in range(3):
|
202 |
+
depth[f"level_{l}"] = cv2.resize(
|
203 |
+
depth_h,
|
204 |
+
None,
|
205 |
+
fx=1.0 / (2**l),
|
206 |
+
fy=1.0 / (2**l),
|
207 |
+
interpolation=cv2.INTER_NEAREST,
|
208 |
+
)
|
209 |
+
|
210 |
+
if self.split == "train":
|
211 |
+
cutout = np.ones_like(depth[f"level_2"])
|
212 |
+
h0 = int(np.random.randint(0, high=cutout.shape[0] // 5, size=1))
|
213 |
+
h1 = int(
|
214 |
+
np.random.randint(
|
215 |
+
4 * cutout.shape[0] // 5, high=cutout.shape[0], size=1
|
216 |
+
)
|
217 |
+
)
|
218 |
+
w0 = int(np.random.randint(0, high=cutout.shape[1] // 5, size=1))
|
219 |
+
w1 = int(
|
220 |
+
np.random.randint(
|
221 |
+
4 * cutout.shape[1] // 5, high=cutout.shape[1], size=1
|
222 |
+
)
|
223 |
+
)
|
224 |
+
cutout[h0:h1, w0:w1] = 0
|
225 |
+
depth_aug = depth[f"level_2"] * cutout
|
226 |
+
else:
|
227 |
+
depth_aug = depth[f"level_2"].copy()
|
228 |
+
|
229 |
+
return depth, depth_h, depth_aug
|
230 |
+
|
231 |
+
|
232 |
+
def __getitem__(self, idx):
|
233 |
+
sample = {}
|
234 |
+
origin_idx = idx
|
235 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
236 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
237 |
+
|
238 |
+
|
239 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
240 |
+
idx = idx % 8 # [0, 7]
|
241 |
+
folder_id = folder_uid_dict['folder_id']
|
242 |
+
uid = folder_uid_dict['uid']
|
243 |
+
|
244 |
+
# idx = idx % 8
|
245 |
+
# uid = 'c40d63d5d740405e91c7f5fce855076e'
|
246 |
+
# folder_id = '000-123'
|
247 |
+
|
248 |
+
# target view
|
249 |
+
c2w = self.c2ws[idx]
|
250 |
+
w2c = np.linalg.inv(c2w)
|
251 |
+
w2c_ref = w2c
|
252 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
253 |
+
|
254 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
255 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
256 |
+
|
257 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
258 |
+
|
259 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
260 |
+
|
261 |
+
|
262 |
+
img = Image.open(img_filename)
|
263 |
+
|
264 |
+
img = self.transform(img) # (4, h, w)
|
265 |
+
|
266 |
+
|
267 |
+
if img.shape[0] == 4:
|
268 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
269 |
+
imgs += [img]
|
270 |
+
|
271 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
272 |
+
mask_h = depth_h > 0
|
273 |
+
# print("valid pixels", np.sum(mask_h))
|
274 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
275 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
276 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
277 |
+
depth_h = distance
|
278 |
+
|
279 |
+
|
280 |
+
depths_h.append(depth_h)
|
281 |
+
masks_h.append(mask_h)
|
282 |
+
|
283 |
+
intrinsic = self.intrinsic
|
284 |
+
intrinsics.append(intrinsic)
|
285 |
+
|
286 |
+
|
287 |
+
near_fars.append(self.near_fars[idx])
|
288 |
+
image_perm = 0 # only supervised on reference view
|
289 |
+
|
290 |
+
mask_dilated = None
|
291 |
+
|
292 |
+
src_views = range(8+idx*4, 8+(idx+1)*4)
|
293 |
+
|
294 |
+
|
295 |
+
for vid in src_views:
|
296 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_{vid%4}_10.png')
|
297 |
+
|
298 |
+
img = Image.open(img_filename)
|
299 |
+
img_wh = self.img_wh
|
300 |
+
|
301 |
+
img = self.transform(img)
|
302 |
+
if img.shape[0] == 4:
|
303 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
304 |
+
|
305 |
+
imgs += [img]
|
306 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
307 |
+
depths_h.append(depth_h)
|
308 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
309 |
+
|
310 |
+
near_fars.append(self.all_near_fars[vid])
|
311 |
+
intrinsics.append(self.all_intrinsics[vid])
|
312 |
+
|
313 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
314 |
+
|
315 |
+
|
316 |
+
# ! estimate scale_mat
|
317 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
318 |
+
img_hw=[img_wh[1], img_wh[0]],
|
319 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
320 |
+
near_fars=near_fars, factor=1.1
|
321 |
+
)
|
322 |
+
# print(scale_mat)
|
323 |
+
# print(scale_factor)
|
324 |
+
# ! calculate the new w2cs after scaling
|
325 |
+
new_near_fars = []
|
326 |
+
new_w2cs = []
|
327 |
+
new_c2ws = []
|
328 |
+
new_affine_mats = []
|
329 |
+
new_depths_h = []
|
330 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
331 |
+
|
332 |
+
P = intrinsic @ extrinsic @ scale_mat
|
333 |
+
P = P[:3, :4]
|
334 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
335 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
336 |
+
w2c = np.linalg.inv(c2w)
|
337 |
+
new_w2cs.append(w2c)
|
338 |
+
new_c2ws.append(c2w)
|
339 |
+
affine_mat = np.eye(4)
|
340 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
341 |
+
new_affine_mats.append(affine_mat)
|
342 |
+
|
343 |
+
camera_o = c2w[:3, 3]
|
344 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
345 |
+
near = dist - 1
|
346 |
+
far = dist + 1
|
347 |
+
|
348 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
349 |
+
new_depths_h.append(depth * scale_factor)
|
350 |
+
|
351 |
+
# print(new_near_fars)
|
352 |
+
imgs = torch.stack(imgs).float()
|
353 |
+
depths_h = np.stack(new_depths_h)
|
354 |
+
masks_h = np.stack(masks_h)
|
355 |
+
|
356 |
+
affine_mats = np.stack(new_affine_mats)
|
357 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
358 |
+
new_near_fars)
|
359 |
+
|
360 |
+
if self.split == 'train':
|
361 |
+
start_idx = 0
|
362 |
+
else:
|
363 |
+
start_idx = 1
|
364 |
+
|
365 |
+
view_ids = [idx] + list(src_views)
|
366 |
+
sample['origin_idx'] = origin_idx
|
367 |
+
sample['images'] = imgs # (V, 3, H, W)
|
368 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
369 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
370 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
371 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
372 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
373 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
374 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
375 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
376 |
+
|
377 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
378 |
+
sample['scan'] = folder_id
|
379 |
+
|
380 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
381 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
382 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
383 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
384 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
385 |
+
|
386 |
+
|
387 |
+
# - image to render
|
388 |
+
sample['query_image'] = sample['images'][0]
|
389 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
390 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
391 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
392 |
+
sample['query_depth'] = sample['depths_h'][0]
|
393 |
+
sample['query_mask'] = sample['masks_h'][0]
|
394 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
395 |
+
|
396 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
397 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
398 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
399 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
400 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
401 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
402 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
403 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
404 |
+
|
405 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
406 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
407 |
+
|
408 |
+
# - generate rays
|
409 |
+
if ('val' in self.split) or ('test' in self.split):
|
410 |
+
sample_rays = gen_rays_from_single_image(
|
411 |
+
img_wh[1], img_wh[0],
|
412 |
+
sample['query_image'],
|
413 |
+
sample['query_intrinsic'],
|
414 |
+
sample['query_c2w'],
|
415 |
+
depth=sample['query_depth'],
|
416 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
417 |
+
else:
|
418 |
+
sample_rays = gen_random_rays_from_single_image(
|
419 |
+
img_wh[1], img_wh[0],
|
420 |
+
self.N_rays,
|
421 |
+
sample['query_image'],
|
422 |
+
sample['query_intrinsic'],
|
423 |
+
sample['query_c2w'],
|
424 |
+
depth=sample['query_depth'],
|
425 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
426 |
+
dilated_mask=mask_dilated,
|
427 |
+
importance_sample=self.importance_sample)
|
428 |
+
|
429 |
+
|
430 |
+
sample['rays'] = sample_rays
|
431 |
+
|
432 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_12_narrow.py
ADDED
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 12
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow_8, 'r') as f:
|
96 |
+
narrow_8_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
|
99 |
+
with open(pose_json_path_narrow_4, 'r') as f:
|
100 |
+
narrow_4_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_8_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance*len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
idx = idx % self.imgs_per_instance # [0, 11]
|
208 |
+
if idx < 8:
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
img = Image.open(img_filename)
|
223 |
+
|
224 |
+
img = self.transform(img) # (4, h, w)
|
225 |
+
else:
|
226 |
+
# target view
|
227 |
+
c2w = self.c2ws[idx-8+40]
|
228 |
+
w2c = np.linalg.inv(c2w)
|
229 |
+
w2c_ref = w2c
|
230 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
231 |
+
|
232 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
233 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
234 |
+
|
235 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
|
236 |
+
|
237 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
238 |
+
|
239 |
+
img = Image.open(img_filename)
|
240 |
+
|
241 |
+
img = self.transform(img) # (4, h, w)
|
242 |
+
|
243 |
+
if img.shape[0] == 4:
|
244 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
245 |
+
imgs += [img]
|
246 |
+
|
247 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
248 |
+
mask_h = depth_h > 0
|
249 |
+
# print("valid pixels", np.sum(mask_h))
|
250 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
251 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
252 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
253 |
+
depth_h = distance
|
254 |
+
|
255 |
+
|
256 |
+
|
257 |
+
depths_h.append(depth_h)
|
258 |
+
masks_h.append(mask_h)
|
259 |
+
|
260 |
+
intrinsic = self.intrinsic
|
261 |
+
intrinsics.append(intrinsic)
|
262 |
+
|
263 |
+
|
264 |
+
near_fars.append(self.near_fars[idx])
|
265 |
+
image_perm = 0 # only supervised on reference view
|
266 |
+
|
267 |
+
mask_dilated = None
|
268 |
+
|
269 |
+
|
270 |
+
src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
|
271 |
+
src_views_used = []
|
272 |
+
skipped_idx = [40, 41, 42, 43]
|
273 |
+
for vid in src_views:
|
274 |
+
if vid in skipped_idx:
|
275 |
+
continue
|
276 |
+
|
277 |
+
src_views_used.append(vid)
|
278 |
+
cur_view_id = (vid - 8) // 4 # [0, 7]
|
279 |
+
|
280 |
+
# choose narrow
|
281 |
+
if cur_view_id < 8:
|
282 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
|
283 |
+
else: # choose 2-stage
|
284 |
+
cur_view_id = cur_view_id - 1
|
285 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
|
286 |
+
|
287 |
+
img = Image.open(img_filename)
|
288 |
+
img_wh = self.img_wh
|
289 |
+
|
290 |
+
img = self.transform(img)
|
291 |
+
if img.shape[0] == 4:
|
292 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
293 |
+
|
294 |
+
imgs += [img]
|
295 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
296 |
+
depths_h.append(depth_h)
|
297 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
298 |
+
|
299 |
+
near_fars.append(self.all_near_fars[vid])
|
300 |
+
intrinsics.append(self.all_intrinsics[vid])
|
301 |
+
|
302 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
303 |
+
|
304 |
+
|
305 |
+
|
306 |
+
|
307 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
308 |
+
img_hw=[img_wh[1], img_wh[0]],
|
309 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
310 |
+
near_fars=near_fars, factor=1.1
|
311 |
+
)
|
312 |
+
|
313 |
+
|
314 |
+
new_near_fars = []
|
315 |
+
new_w2cs = []
|
316 |
+
new_c2ws = []
|
317 |
+
new_affine_mats = []
|
318 |
+
new_depths_h = []
|
319 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
320 |
+
|
321 |
+
P = intrinsic @ extrinsic @ scale_mat
|
322 |
+
P = P[:3, :4]
|
323 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
324 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
325 |
+
w2c = np.linalg.inv(c2w)
|
326 |
+
new_w2cs.append(w2c)
|
327 |
+
new_c2ws.append(c2w)
|
328 |
+
affine_mat = np.eye(4)
|
329 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
330 |
+
new_affine_mats.append(affine_mat)
|
331 |
+
|
332 |
+
camera_o = c2w[:3, 3]
|
333 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
334 |
+
near = dist - 1
|
335 |
+
far = dist + 1
|
336 |
+
|
337 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
338 |
+
|
339 |
+
new_depths_h.append(depth * scale_factor)
|
340 |
+
|
341 |
+
# print(new_near_fars)
|
342 |
+
# print("img numeber: ", len(imgs))
|
343 |
+
imgs = torch.stack(imgs).float()
|
344 |
+
depths_h = np.stack(new_depths_h)
|
345 |
+
masks_h = np.stack(masks_h)
|
346 |
+
|
347 |
+
affine_mats = np.stack(new_affine_mats)
|
348 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
349 |
+
new_near_fars)
|
350 |
+
|
351 |
+
if self.split == 'train':
|
352 |
+
start_idx = 0
|
353 |
+
else:
|
354 |
+
start_idx = 1
|
355 |
+
|
356 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
357 |
+
sample['origin_idx'] = origin_idx
|
358 |
+
sample['images'] = imgs # (V, 3, H, W)
|
359 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
360 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
361 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
362 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
363 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
364 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
365 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
366 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
367 |
+
|
368 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
369 |
+
sample['scan'] = folder_id
|
370 |
+
|
371 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
372 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
373 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
374 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
375 |
+
if view_ids[0] < 8:
|
376 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
377 |
+
else:
|
378 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
379 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
380 |
+
|
381 |
+
|
382 |
+
# - image to render
|
383 |
+
sample['query_image'] = sample['images'][0]
|
384 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
385 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
386 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
387 |
+
sample['query_depth'] = sample['depths_h'][0]
|
388 |
+
sample['query_mask'] = sample['masks_h'][0]
|
389 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
390 |
+
|
391 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
392 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
393 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
394 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
395 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
396 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
397 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
398 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
399 |
+
|
400 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
401 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
402 |
+
|
403 |
+
# - generate rays
|
404 |
+
if ('val' in self.split) or ('test' in self.split):
|
405 |
+
sample_rays = gen_rays_from_single_image(
|
406 |
+
img_wh[1], img_wh[0],
|
407 |
+
sample['query_image'],
|
408 |
+
sample['query_intrinsic'],
|
409 |
+
sample['query_c2w'],
|
410 |
+
depth=sample['query_depth'],
|
411 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
412 |
+
else:
|
413 |
+
sample_rays = gen_random_rays_from_single_image(
|
414 |
+
img_wh[1], img_wh[0],
|
415 |
+
self.N_rays,
|
416 |
+
sample['query_image'],
|
417 |
+
sample['query_intrinsic'],
|
418 |
+
sample['query_c2w'],
|
419 |
+
depth=sample['query_depth'],
|
420 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
421 |
+
dilated_mask=mask_dilated,
|
422 |
+
importance_sample=self.importance_sample)
|
423 |
+
|
424 |
+
|
425 |
+
sample['rays'] = sample_rays
|
426 |
+
|
427 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_12_narrow_8.py
ADDED
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 8
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/narrow_12_split_upd.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow_8 = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow_8, 'r') as f:
|
96 |
+
narrow_8_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_narrow_4 = "/objaverse-processed/zero12345_img/zero12345_2stage_12_pose.json"
|
99 |
+
with open(pose_json_path_narrow_4, 'r') as f:
|
100 |
+
narrow_4_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_8_meta["c2ws"].keys()) + list(narrow_4_meta["c2ws"].keys()) # (8 + 8*4) + (4 + 4*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_8_meta["c2ws"].values()) + list(narrow_4_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_8_meta["intrinsics"] == narrow_4_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_8_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_8_meta["near_far"] == narrow_4_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_8_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance*len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
idx = idx % self.imgs_per_instance # [0, 11]
|
208 |
+
if idx < 8:
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
img = Image.open(img_filename)
|
223 |
+
|
224 |
+
img = self.transform(img) # (4, h, w)
|
225 |
+
else:
|
226 |
+
# target view
|
227 |
+
c2w = self.c2ws[idx-8+40]
|
228 |
+
w2c = np.linalg.inv(c2w)
|
229 |
+
w2c_ref = w2c
|
230 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
231 |
+
|
232 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
233 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
234 |
+
|
235 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}.png')
|
236 |
+
|
237 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
238 |
+
|
239 |
+
img = Image.open(img_filename)
|
240 |
+
|
241 |
+
img = self.transform(img) # (4, h, w)
|
242 |
+
|
243 |
+
if img.shape[0] == 4:
|
244 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
245 |
+
imgs += [img]
|
246 |
+
|
247 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
248 |
+
mask_h = depth_h > 0
|
249 |
+
# print("valid pixels", np.sum(mask_h))
|
250 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
251 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
252 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
253 |
+
depth_h = distance
|
254 |
+
|
255 |
+
|
256 |
+
|
257 |
+
depths_h.append(depth_h)
|
258 |
+
masks_h.append(mask_h)
|
259 |
+
|
260 |
+
intrinsic = self.intrinsic
|
261 |
+
intrinsics.append(intrinsic)
|
262 |
+
|
263 |
+
|
264 |
+
near_fars.append(self.near_fars[idx])
|
265 |
+
image_perm = 0 # only supervised on reference view
|
266 |
+
|
267 |
+
mask_dilated = None
|
268 |
+
|
269 |
+
|
270 |
+
src_views = range(8, 8 + 8 * 4 + 4 + 4*4)
|
271 |
+
src_views_used = []
|
272 |
+
skipped_idx = [40, 41, 42, 43]
|
273 |
+
for vid in src_views:
|
274 |
+
if vid in skipped_idx:
|
275 |
+
continue
|
276 |
+
|
277 |
+
src_views_used.append(vid)
|
278 |
+
cur_view_id = (vid - 8) // 4 # [0, 7]
|
279 |
+
|
280 |
+
# choose narrow
|
281 |
+
if cur_view_id < 8:
|
282 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
|
283 |
+
else: # choose 2-stage
|
284 |
+
cur_view_id = cur_view_id - 1
|
285 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow_12", folder_id, uid, f'view_{cur_view_id}_{vid%4}.png')
|
286 |
+
|
287 |
+
img = Image.open(img_filename)
|
288 |
+
img_wh = self.img_wh
|
289 |
+
|
290 |
+
img = self.transform(img)
|
291 |
+
if img.shape[0] == 4:
|
292 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
293 |
+
|
294 |
+
imgs += [img]
|
295 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
296 |
+
depths_h.append(depth_h)
|
297 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
298 |
+
|
299 |
+
near_fars.append(self.all_near_fars[vid])
|
300 |
+
intrinsics.append(self.all_intrinsics[vid])
|
301 |
+
|
302 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
303 |
+
|
304 |
+
|
305 |
+
|
306 |
+
|
307 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
308 |
+
img_hw=[img_wh[1], img_wh[0]],
|
309 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
310 |
+
near_fars=near_fars, factor=1.1
|
311 |
+
)
|
312 |
+
|
313 |
+
|
314 |
+
new_near_fars = []
|
315 |
+
new_w2cs = []
|
316 |
+
new_c2ws = []
|
317 |
+
new_affine_mats = []
|
318 |
+
new_depths_h = []
|
319 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
320 |
+
|
321 |
+
P = intrinsic @ extrinsic @ scale_mat
|
322 |
+
P = P[:3, :4]
|
323 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
324 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
325 |
+
w2c = np.linalg.inv(c2w)
|
326 |
+
new_w2cs.append(w2c)
|
327 |
+
new_c2ws.append(c2w)
|
328 |
+
affine_mat = np.eye(4)
|
329 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
330 |
+
new_affine_mats.append(affine_mat)
|
331 |
+
|
332 |
+
camera_o = c2w[:3, 3]
|
333 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
334 |
+
near = dist - 1
|
335 |
+
far = dist + 1
|
336 |
+
|
337 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
338 |
+
|
339 |
+
new_depths_h.append(depth * scale_factor)
|
340 |
+
|
341 |
+
# print(new_near_fars)
|
342 |
+
# print("img numeber: ", len(imgs))
|
343 |
+
imgs = torch.stack(imgs).float()
|
344 |
+
depths_h = np.stack(new_depths_h)
|
345 |
+
masks_h = np.stack(masks_h)
|
346 |
+
|
347 |
+
affine_mats = np.stack(new_affine_mats)
|
348 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
349 |
+
new_near_fars)
|
350 |
+
|
351 |
+
if self.split == 'train':
|
352 |
+
start_idx = 0
|
353 |
+
else:
|
354 |
+
start_idx = 1
|
355 |
+
|
356 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
357 |
+
sample['origin_idx'] = origin_idx
|
358 |
+
sample['images'] = imgs # (V, 3, H, W)
|
359 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
360 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
361 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
362 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
363 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
364 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
365 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
366 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
367 |
+
|
368 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
369 |
+
sample['scan'] = folder_id
|
370 |
+
|
371 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
372 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
373 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
374 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
375 |
+
if view_ids[0] < 8:
|
376 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
377 |
+
else:
|
378 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
379 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
380 |
+
|
381 |
+
|
382 |
+
# - image to render
|
383 |
+
sample['query_image'] = sample['images'][0]
|
384 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
385 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
386 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
387 |
+
sample['query_depth'] = sample['depths_h'][0]
|
388 |
+
sample['query_mask'] = sample['masks_h'][0]
|
389 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
390 |
+
|
391 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
392 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
393 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
394 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
395 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
396 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
397 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
398 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
399 |
+
|
400 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
401 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
402 |
+
|
403 |
+
# - generate rays
|
404 |
+
if ('val' in self.split) or ('test' in self.split):
|
405 |
+
sample_rays = gen_rays_from_single_image(
|
406 |
+
img_wh[1], img_wh[0],
|
407 |
+
sample['query_image'],
|
408 |
+
sample['query_intrinsic'],
|
409 |
+
sample['query_c2w'],
|
410 |
+
depth=sample['query_depth'],
|
411 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
412 |
+
else:
|
413 |
+
sample_rays = gen_random_rays_from_single_image(
|
414 |
+
img_wh[1], img_wh[0],
|
415 |
+
self.N_rays,
|
416 |
+
sample['query_image'],
|
417 |
+
sample['query_intrinsic'],
|
418 |
+
sample['query_c2w'],
|
419 |
+
depth=sample['query_depth'],
|
420 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
421 |
+
dilated_mask=mask_dilated,
|
422 |
+
importance_sample=self.importance_sample)
|
423 |
+
|
424 |
+
|
425 |
+
sample['rays'] = sample_rays
|
426 |
+
|
427 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_360.py
ADDED
@@ -0,0 +1,412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
def get_ray_directions(H, W, focal, center=None):
|
19 |
+
"""
|
20 |
+
Get ray directions for all pixels in camera coordinate.
|
21 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
22 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
23 |
+
Inputs:
|
24 |
+
H, W, focal: image height, width and focal length
|
25 |
+
Outputs:
|
26 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
27 |
+
"""
|
28 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
29 |
+
|
30 |
+
i, j = grid.unbind(-1)
|
31 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
32 |
+
# see https://github.com/bmild/nerf/issues/24
|
33 |
+
cent = center if center is not None else [W / 2, H / 2]
|
34 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
35 |
+
|
36 |
+
return directions
|
37 |
+
|
38 |
+
def load_K_Rt_from_P(filename, P=None):
|
39 |
+
if P is None:
|
40 |
+
lines = open(filename).read().splitlines()
|
41 |
+
if len(lines) == 4:
|
42 |
+
lines = lines[1:]
|
43 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
44 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
45 |
+
|
46 |
+
out = cv2.decomposeProjectionMatrix(P)
|
47 |
+
K = out[0]
|
48 |
+
R = out[1]
|
49 |
+
t = out[2]
|
50 |
+
|
51 |
+
K = K / K[2, 2]
|
52 |
+
intrinsics = np.eye(4)
|
53 |
+
intrinsics[:3, :3] = K
|
54 |
+
|
55 |
+
pose = np.eye(4, dtype=np.float32)
|
56 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
57 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
58 |
+
|
59 |
+
return intrinsics, pose # ! return cam2world matrix here
|
60 |
+
|
61 |
+
|
62 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
63 |
+
class BlenderPerView(Dataset):
|
64 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
65 |
+
split_filepath=None, pair_filepath=None,
|
66 |
+
N_rays=512,
|
67 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
68 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
69 |
+
|
70 |
+
# print("root_dir: ", root_dir)
|
71 |
+
self.root_dir = root_dir
|
72 |
+
self.split = split
|
73 |
+
|
74 |
+
self.n_views = n_views
|
75 |
+
self.N_rays = N_rays
|
76 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
77 |
+
|
78 |
+
self.clean_image = clean_image
|
79 |
+
self.importance_sample = importance_sample
|
80 |
+
self.test_ref_views = test_ref_views # used for testing
|
81 |
+
self.scale_factor = 1.0
|
82 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
83 |
+
|
84 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
85 |
+
with open(lvis_json_path, 'r') as f:
|
86 |
+
lvis_paths = json.load(f)
|
87 |
+
if self.split == 'train':
|
88 |
+
self.lvis_paths = lvis_paths['train']
|
89 |
+
else:
|
90 |
+
self.lvis_paths = lvis_paths['val']
|
91 |
+
if img_wh is not None:
|
92 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
93 |
+
'img_wh must both be multiples of 32!'
|
94 |
+
|
95 |
+
|
96 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_wide_pose.json"
|
97 |
+
with open(pose_json_path, 'r') as f:
|
98 |
+
meta = json.load(f)
|
99 |
+
|
100 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
|
101 |
+
self.img_wh = (256, 256)
|
102 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
103 |
+
intrinsic = np.eye(4)
|
104 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
105 |
+
self.intrinsic = intrinsic
|
106 |
+
self.near_far = np.array(meta["near_far"])
|
107 |
+
|
108 |
+
|
109 |
+
self.define_transforms()
|
110 |
+
self.blender2opencv = np.array(
|
111 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
112 |
+
)
|
113 |
+
|
114 |
+
|
115 |
+
self.c2ws = []
|
116 |
+
self.w2cs = []
|
117 |
+
self.near_fars = []
|
118 |
+
# self.root_dir = root_dir
|
119 |
+
for idx, img_id in enumerate(self.img_ids):
|
120 |
+
pose = self.input_poses[idx]
|
121 |
+
c2w = pose @ self.blender2opencv
|
122 |
+
self.c2ws.append(c2w)
|
123 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
124 |
+
self.near_fars.append(self.near_far)
|
125 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
126 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
127 |
+
|
128 |
+
|
129 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
130 |
+
self.all_extrinsics = []
|
131 |
+
self.all_near_fars = []
|
132 |
+
self.load_cam_info()
|
133 |
+
|
134 |
+
# * bounding box for rendering
|
135 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
136 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
137 |
+
|
138 |
+
# - used for cost volume regularization
|
139 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
140 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
141 |
+
|
142 |
+
|
143 |
+
def define_transforms(self):
|
144 |
+
self.transform = T.Compose([T.ToTensor()])
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
def load_cam_info(self):
|
149 |
+
for vid, img_id in enumerate(self.img_ids):
|
150 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
151 |
+
self.all_intrinsics.append(intrinsic)
|
152 |
+
self.all_extrinsics.append(extrinsic)
|
153 |
+
self.all_near_fars.append(near_far)
|
154 |
+
|
155 |
+
def read_depth(self, filename):
|
156 |
+
depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
|
157 |
+
depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
|
158 |
+
interpolation=cv2.INTER_NEAREST) # (600, 800)
|
159 |
+
depth_h = depth_h[44:556, 80:720] # (512, 640)
|
160 |
+
depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
|
163 |
+
interpolation=cv2.INTER_NEAREST)
|
164 |
+
|
165 |
+
return depth, depth_h
|
166 |
+
|
167 |
+
def read_mask(self, filename):
|
168 |
+
mask_h = cv2.imread(filename, 0)
|
169 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
170 |
+
interpolation=cv2.INTER_NEAREST)
|
171 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
172 |
+
interpolation=cv2.INTER_NEAREST)
|
173 |
+
|
174 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
175 |
+
mask_h[mask_h > 0] = 1
|
176 |
+
|
177 |
+
return mask, mask_h
|
178 |
+
|
179 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
180 |
+
|
181 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
182 |
+
# print("center", center)
|
183 |
+
# print("radius", radius)
|
184 |
+
# print("bounds", bounds)
|
185 |
+
# import ipdb; ipdb.set_trace()
|
186 |
+
radius = radius * factor
|
187 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
188 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
189 |
+
scale_mat = scale_mat.astype(np.float32)
|
190 |
+
|
191 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
192 |
+
|
193 |
+
def __len__(self):
|
194 |
+
return 36*len(self.lvis_paths)
|
195 |
+
|
196 |
+
|
197 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
198 |
+
pass
|
199 |
+
|
200 |
+
|
201 |
+
def __getitem__(self, idx):
|
202 |
+
sample = {}
|
203 |
+
|
204 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
205 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
206 |
+
|
207 |
+
|
208 |
+
folder_uid_dict = self.lvis_paths[idx//36]
|
209 |
+
|
210 |
+
|
211 |
+
folder_id = folder_uid_dict['folder_id']
|
212 |
+
uid = folder_uid_dict['uid']
|
213 |
+
|
214 |
+
idx = idx % 36 # [0, 35]
|
215 |
+
gt_view_idx = idx // 12 # [0, 2]
|
216 |
+
target_view_idx = idx % 12 # [0, 11]
|
217 |
+
|
218 |
+
|
219 |
+
|
220 |
+
# target view
|
221 |
+
c2w = self.c2ws[idx]
|
222 |
+
w2c = np.linalg.inv(c2w)
|
223 |
+
w2c_ref = w2c
|
224 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
225 |
+
|
226 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
227 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
228 |
+
|
229 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt.png')
|
230 |
+
|
231 |
+
depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}_gt_depth_mm.png')
|
232 |
+
|
233 |
+
|
234 |
+
img = Image.open(img_filename)
|
235 |
+
|
236 |
+
img = self.transform(img) # (4, h, w)
|
237 |
+
|
238 |
+
|
239 |
+
if img.shape[0] == 4:
|
240 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
241 |
+
imgs += [img]
|
242 |
+
|
243 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
244 |
+
mask_h = depth_h > 0
|
245 |
+
# print("valid pixels", np.sum(mask_h))
|
246 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
247 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
248 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
249 |
+
depth_h = distance
|
250 |
+
|
251 |
+
|
252 |
+
depths_h.append(depth_h)
|
253 |
+
masks_h.append(mask_h)
|
254 |
+
|
255 |
+
intrinsic = self.intrinsic
|
256 |
+
intrinsics.append(intrinsic)
|
257 |
+
|
258 |
+
|
259 |
+
|
260 |
+
near_fars.append(self.near_fars[idx])
|
261 |
+
image_perm = 0 # only supervised on reference view
|
262 |
+
|
263 |
+
mask_dilated = None
|
264 |
+
|
265 |
+
# src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
|
266 |
+
|
267 |
+
idx_of_12 = idx - 12 * gt_view_idx # idx % 12
|
268 |
+
|
269 |
+
src_views = list(i % 12 + 12 * gt_view_idx for i in range(idx_of_12 - 1-1, idx_of_12 + 2+1))
|
270 |
+
|
271 |
+
|
272 |
+
for vid in src_views:
|
273 |
+
# if vid == idx:
|
274 |
+
# continue
|
275 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{gt_view_idx}_{target_view_idx}.png')
|
276 |
+
|
277 |
+
img = Image.open(img_filename)
|
278 |
+
img_wh = self.img_wh
|
279 |
+
|
280 |
+
img = self.transform(img)
|
281 |
+
if img.shape[0] == 4:
|
282 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
283 |
+
|
284 |
+
imgs += [img]
|
285 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
286 |
+
depths_h.append(depth_h)
|
287 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
288 |
+
|
289 |
+
near_fars.append(self.all_near_fars[vid])
|
290 |
+
intrinsics.append(self.all_intrinsics[vid])
|
291 |
+
|
292 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
293 |
+
|
294 |
+
|
295 |
+
# ! estimate scale_mat
|
296 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
297 |
+
img_hw=[img_wh[1], img_wh[0]],
|
298 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
299 |
+
near_fars=near_fars, factor=1.1
|
300 |
+
)
|
301 |
+
# print(scale_mat)
|
302 |
+
# print(scale_factor)
|
303 |
+
# ! calculate the new w2cs after scaling
|
304 |
+
new_near_fars = []
|
305 |
+
new_w2cs = []
|
306 |
+
new_c2ws = []
|
307 |
+
new_affine_mats = []
|
308 |
+
new_depths_h = []
|
309 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
310 |
+
|
311 |
+
P = intrinsic @ extrinsic @ scale_mat
|
312 |
+
P = P[:3, :4]
|
313 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
314 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
315 |
+
w2c = np.linalg.inv(c2w)
|
316 |
+
new_w2cs.append(w2c)
|
317 |
+
new_c2ws.append(c2w)
|
318 |
+
affine_mat = np.eye(4)
|
319 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
320 |
+
new_affine_mats.append(affine_mat)
|
321 |
+
|
322 |
+
camera_o = c2w[:3, 3]
|
323 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
324 |
+
near = dist - 1
|
325 |
+
far = dist + 1
|
326 |
+
|
327 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
328 |
+
new_depths_h.append(depth * scale_factor)
|
329 |
+
|
330 |
+
# print(new_near_fars)
|
331 |
+
imgs = torch.stack(imgs).float()
|
332 |
+
depths_h = np.stack(new_depths_h)
|
333 |
+
masks_h = np.stack(masks_h)
|
334 |
+
|
335 |
+
affine_mats = np.stack(new_affine_mats)
|
336 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
337 |
+
new_near_fars)
|
338 |
+
|
339 |
+
if self.split == 'train':
|
340 |
+
start_idx = 0
|
341 |
+
else:
|
342 |
+
start_idx = 1
|
343 |
+
|
344 |
+
view_ids = [idx] + list(src_views)
|
345 |
+
|
346 |
+
sample['images'] = imgs # (V, 3, H, W)
|
347 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
348 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
349 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
350 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
351 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
352 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
353 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
354 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
355 |
+
|
356 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
357 |
+
sample['scan'] = folder_id
|
358 |
+
|
359 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
360 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
361 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
362 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
363 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
364 |
+
|
365 |
+
|
366 |
+
# - image to render
|
367 |
+
sample['query_image'] = sample['images'][0]
|
368 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
369 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
370 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
371 |
+
sample['query_depth'] = sample['depths_h'][0]
|
372 |
+
sample['query_mask'] = sample['masks_h'][0]
|
373 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
374 |
+
|
375 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
376 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
377 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
378 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
379 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
380 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
381 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
382 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
383 |
+
|
384 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
385 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
386 |
+
|
387 |
+
# - generate rays
|
388 |
+
if ('val' in self.split) or ('test' in self.split):
|
389 |
+
sample_rays = gen_rays_from_single_image(
|
390 |
+
img_wh[1], img_wh[0],
|
391 |
+
sample['query_image'],
|
392 |
+
sample['query_intrinsic'],
|
393 |
+
sample['query_c2w'],
|
394 |
+
depth=sample['query_depth'],
|
395 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
396 |
+
|
397 |
+
else:
|
398 |
+
sample_rays = gen_random_rays_from_single_image(
|
399 |
+
img_wh[1], img_wh[0],
|
400 |
+
self.N_rays,
|
401 |
+
sample['query_image'],
|
402 |
+
sample['query_intrinsic'],
|
403 |
+
sample['query_c2w'],
|
404 |
+
depth=sample['query_depth'],
|
405 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
406 |
+
dilated_mask=mask_dilated,
|
407 |
+
importance_sample=self.importance_sample)
|
408 |
+
|
409 |
+
|
410 |
+
sample['rays'] = sample_rays
|
411 |
+
|
412 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_3.py
ADDED
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
|
155 |
+
depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
|
156 |
+
interpolation=cv2.INTER_NEAREST) # (600, 800)
|
157 |
+
depth_h = depth_h[44:556, 80:720] # (512, 640)
|
158 |
+
depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
return depth, depth_h
|
164 |
+
|
165 |
+
def read_mask(self, filename):
|
166 |
+
mask_h = cv2.imread(filename, 0)
|
167 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
168 |
+
interpolation=cv2.INTER_NEAREST)
|
169 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
170 |
+
interpolation=cv2.INTER_NEAREST)
|
171 |
+
|
172 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
173 |
+
mask_h[mask_h > 0] = 1
|
174 |
+
|
175 |
+
return mask, mask_h
|
176 |
+
|
177 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
178 |
+
|
179 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
180 |
+
# print("center", center)
|
181 |
+
# print("radius", radius)
|
182 |
+
# print("bounds", bounds)
|
183 |
+
# import ipdb; ipdb.set_trace()
|
184 |
+
radius = radius * factor
|
185 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
186 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
187 |
+
scale_mat = scale_mat.astype(np.float32)
|
188 |
+
|
189 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
190 |
+
|
191 |
+
def __len__(self):
|
192 |
+
return 6*len(self.lvis_paths)
|
193 |
+
|
194 |
+
|
195 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
196 |
+
pass
|
197 |
+
|
198 |
+
|
199 |
+
def __getitem__(self, idx):
|
200 |
+
sample = {}
|
201 |
+
|
202 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
203 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
204 |
+
|
205 |
+
|
206 |
+
folder_uid_dict = self.lvis_paths[idx//6]
|
207 |
+
idx = idx % 6
|
208 |
+
|
209 |
+
folder_id = folder_uid_dict['folder_id']
|
210 |
+
uid = folder_uid_dict['uid']
|
211 |
+
|
212 |
+
# idx = idx % 24 # [0, 23]
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
# target view
|
217 |
+
c2w = self.c2ws[idx]
|
218 |
+
w2c = np.linalg.inv(c2w)
|
219 |
+
w2c_ref = w2c
|
220 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
221 |
+
|
222 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
223 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
224 |
+
|
225 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt.png')
|
226 |
+
|
227 |
+
depth_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
|
228 |
+
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
|
232 |
+
img = self.transform(img) # (4, h, w)
|
233 |
+
|
234 |
+
|
235 |
+
if img.shape[0] == 4:
|
236 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
237 |
+
imgs += [img]
|
238 |
+
|
239 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
240 |
+
mask_h = depth_h > 0
|
241 |
+
# print("valid pixels", np.sum(mask_h))
|
242 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
243 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
244 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
245 |
+
depth_h = distance
|
246 |
+
|
247 |
+
|
248 |
+
depths_h.append(depth_h)
|
249 |
+
masks_h.append(mask_h)
|
250 |
+
|
251 |
+
intrinsic = self.intrinsic
|
252 |
+
intrinsics.append(intrinsic)
|
253 |
+
|
254 |
+
|
255 |
+
|
256 |
+
near_fars.append(self.near_fars[idx])
|
257 |
+
image_perm = 0 # only supervised on reference view
|
258 |
+
|
259 |
+
mask_dilated = None
|
260 |
+
|
261 |
+
# src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
|
262 |
+
|
263 |
+
|
264 |
+
src_views = range(6+idx*4, 6+(idx+1)*4)
|
265 |
+
|
266 |
+
for vid in src_views:
|
267 |
+
# if vid == idx:
|
268 |
+
# continue
|
269 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4}.png')
|
270 |
+
|
271 |
+
img = Image.open(img_filename)
|
272 |
+
img_wh = self.img_wh
|
273 |
+
|
274 |
+
img = self.transform(img)
|
275 |
+
if img.shape[0] == 4:
|
276 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
277 |
+
|
278 |
+
imgs += [img]
|
279 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
280 |
+
depths_h.append(depth_h)
|
281 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
282 |
+
|
283 |
+
near_fars.append(self.all_near_fars[vid])
|
284 |
+
intrinsics.append(self.all_intrinsics[vid])
|
285 |
+
|
286 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
287 |
+
|
288 |
+
|
289 |
+
# ! estimate scale_mat
|
290 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
291 |
+
img_hw=[img_wh[1], img_wh[0]],
|
292 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
293 |
+
near_fars=near_fars, factor=1.1
|
294 |
+
)
|
295 |
+
# print(scale_mat)
|
296 |
+
# print(scale_factor)
|
297 |
+
# ! calculate the new w2cs after scaling
|
298 |
+
new_near_fars = []
|
299 |
+
new_w2cs = []
|
300 |
+
new_c2ws = []
|
301 |
+
new_affine_mats = []
|
302 |
+
new_depths_h = []
|
303 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
304 |
+
|
305 |
+
P = intrinsic @ extrinsic @ scale_mat
|
306 |
+
P = P[:3, :4]
|
307 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
308 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
309 |
+
w2c = np.linalg.inv(c2w)
|
310 |
+
new_w2cs.append(w2c)
|
311 |
+
new_c2ws.append(c2w)
|
312 |
+
affine_mat = np.eye(4)
|
313 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
314 |
+
new_affine_mats.append(affine_mat)
|
315 |
+
|
316 |
+
camera_o = c2w[:3, 3]
|
317 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
318 |
+
near = dist - 1
|
319 |
+
far = dist + 1
|
320 |
+
|
321 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
322 |
+
new_depths_h.append(depth * scale_factor)
|
323 |
+
|
324 |
+
# print(new_near_fars)
|
325 |
+
imgs = torch.stack(imgs).float()
|
326 |
+
depths_h = np.stack(new_depths_h)
|
327 |
+
masks_h = np.stack(masks_h)
|
328 |
+
|
329 |
+
affine_mats = np.stack(new_affine_mats)
|
330 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
331 |
+
new_near_fars)
|
332 |
+
|
333 |
+
if self.split == 'train':
|
334 |
+
start_idx = 0
|
335 |
+
else:
|
336 |
+
start_idx = 1
|
337 |
+
|
338 |
+
view_ids = [idx] + list(src_views)
|
339 |
+
|
340 |
+
sample['images'] = imgs # (V, 3, H, W)
|
341 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
342 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
343 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
344 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
345 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
346 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
347 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
348 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
349 |
+
|
350 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
351 |
+
sample['scan'] = folder_id
|
352 |
+
|
353 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
354 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
355 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
356 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
357 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
358 |
+
|
359 |
+
|
360 |
+
# - image to render
|
361 |
+
sample['query_image'] = sample['images'][0]
|
362 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
363 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
364 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
365 |
+
sample['query_depth'] = sample['depths_h'][0]
|
366 |
+
sample['query_mask'] = sample['masks_h'][0]
|
367 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
368 |
+
|
369 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
370 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
371 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
372 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
373 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
374 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
375 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
376 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
377 |
+
|
378 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
379 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
380 |
+
|
381 |
+
# - generate rays
|
382 |
+
if ('val' in self.split) or ('test' in self.split):
|
383 |
+
sample_rays = gen_rays_from_single_image(
|
384 |
+
img_wh[1], img_wh[0],
|
385 |
+
sample['query_image'],
|
386 |
+
sample['query_intrinsic'],
|
387 |
+
sample['query_c2w'],
|
388 |
+
depth=sample['query_depth'],
|
389 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
390 |
+
|
391 |
+
else:
|
392 |
+
sample_rays = gen_random_rays_from_single_image(
|
393 |
+
img_wh[1], img_wh[0],
|
394 |
+
self.N_rays,
|
395 |
+
sample['query_image'],
|
396 |
+
sample['query_intrinsic'],
|
397 |
+
sample['query_c2w'],
|
398 |
+
depth=sample['query_depth'],
|
399 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
400 |
+
dilated_mask=mask_dilated,
|
401 |
+
importance_sample=self.importance_sample)
|
402 |
+
|
403 |
+
|
404 |
+
sample['rays'] = sample_rays
|
405 |
+
|
406 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_360_2_stage_1_4.py
ADDED
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0_0", "view_0_5", "view_1_7"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
|
155 |
+
depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
|
156 |
+
interpolation=cv2.INTER_NEAREST) # (600, 800)
|
157 |
+
depth_h = depth_h[44:556, 80:720] # (512, 640)
|
158 |
+
depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
return depth, depth_h
|
164 |
+
|
165 |
+
def read_mask(self, filename):
|
166 |
+
mask_h = cv2.imread(filename, 0)
|
167 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
168 |
+
interpolation=cv2.INTER_NEAREST)
|
169 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
170 |
+
interpolation=cv2.INTER_NEAREST)
|
171 |
+
|
172 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
173 |
+
mask_h[mask_h > 0] = 1
|
174 |
+
|
175 |
+
return mask, mask_h
|
176 |
+
|
177 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
178 |
+
|
179 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
180 |
+
# print("center", center)
|
181 |
+
# print("radius", radius)
|
182 |
+
# print("bounds", bounds)
|
183 |
+
# import ipdb; ipdb.set_trace()
|
184 |
+
radius = radius * factor
|
185 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
186 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
187 |
+
scale_mat = scale_mat.astype(np.float32)
|
188 |
+
|
189 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
190 |
+
|
191 |
+
def __len__(self):
|
192 |
+
return 6*len(self.lvis_paths)
|
193 |
+
|
194 |
+
|
195 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
196 |
+
pass
|
197 |
+
|
198 |
+
|
199 |
+
def __getitem__(self, idx):
|
200 |
+
sample = {}
|
201 |
+
|
202 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
203 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
204 |
+
|
205 |
+
|
206 |
+
folder_uid_dict = self.lvis_paths[idx//6]
|
207 |
+
idx = idx % 6
|
208 |
+
|
209 |
+
folder_id = folder_uid_dict['folder_id']
|
210 |
+
uid = folder_uid_dict['uid']
|
211 |
+
|
212 |
+
# idx = idx % 24 # [0, 23]
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
# target view
|
217 |
+
c2w = self.c2ws[idx]
|
218 |
+
w2c = np.linalg.inv(c2w)
|
219 |
+
w2c_ref = w2c
|
220 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
221 |
+
|
222 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
223 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
224 |
+
|
225 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt.png')
|
226 |
+
|
227 |
+
depth_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage", folder_id, uid, f'view_0_{idx}_gt_depth_mm.png')
|
228 |
+
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
|
232 |
+
img = self.transform(img) # (4, h, w)
|
233 |
+
|
234 |
+
# print("img_pre", img.shape)
|
235 |
+
if img.shape[0] == 4:
|
236 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
237 |
+
# print("img", img.shape)
|
238 |
+
imgs += [img]
|
239 |
+
|
240 |
+
|
241 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
242 |
+
mask_h = depth_h > 0
|
243 |
+
# print("valid pixels", np.sum(mask_h))
|
244 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
245 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
246 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
247 |
+
depth_h = distance
|
248 |
+
# print("depth_h", depth_h.shape)
|
249 |
+
|
250 |
+
depths_h.append(depth_h)
|
251 |
+
masks_h.append(mask_h)
|
252 |
+
|
253 |
+
intrinsic = self.intrinsic
|
254 |
+
intrinsics.append(intrinsic)
|
255 |
+
|
256 |
+
|
257 |
+
near_fars.append(self.near_fars[idx])
|
258 |
+
image_perm = 0 # only supervised on reference view
|
259 |
+
|
260 |
+
mask_dilated = None
|
261 |
+
|
262 |
+
# src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
|
263 |
+
|
264 |
+
|
265 |
+
src_views = range(6+idx*4, 6+(idx+1)*4)
|
266 |
+
|
267 |
+
for vid in src_views:
|
268 |
+
# if vid == idx:
|
269 |
+
# continue
|
270 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{idx}_{vid % 4 + 1}.png')
|
271 |
+
|
272 |
+
img = Image.open(img_filename)
|
273 |
+
img_wh = self.img_wh
|
274 |
+
|
275 |
+
img = self.transform(img)
|
276 |
+
# print("img shape1: ", img.shape)
|
277 |
+
if img.shape[0] == 4:
|
278 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
279 |
+
# print("img shape2: ", img.shape)
|
280 |
+
imgs += [img]
|
281 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
282 |
+
depths_h.append(depth_h)
|
283 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
284 |
+
|
285 |
+
near_fars.append(self.all_near_fars[vid])
|
286 |
+
intrinsics.append(self.all_intrinsics[vid])
|
287 |
+
|
288 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
289 |
+
|
290 |
+
|
291 |
+
# ! estimate scale_mat
|
292 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
293 |
+
img_hw=[img_wh[1], img_wh[0]],
|
294 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
295 |
+
near_fars=near_fars, factor=1.1
|
296 |
+
)
|
297 |
+
# print(scale_mat)
|
298 |
+
# print(scale_factor)
|
299 |
+
# ! calculate the new w2cs after scaling
|
300 |
+
new_near_fars = []
|
301 |
+
new_w2cs = []
|
302 |
+
new_c2ws = []
|
303 |
+
new_affine_mats = []
|
304 |
+
new_depths_h = []
|
305 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
306 |
+
|
307 |
+
P = intrinsic @ extrinsic @ scale_mat
|
308 |
+
P = P[:3, :4]
|
309 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
310 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
311 |
+
w2c = np.linalg.inv(c2w)
|
312 |
+
new_w2cs.append(w2c)
|
313 |
+
new_c2ws.append(c2w)
|
314 |
+
affine_mat = np.eye(4)
|
315 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
316 |
+
new_affine_mats.append(affine_mat)
|
317 |
+
|
318 |
+
camera_o = c2w[:3, 3]
|
319 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
320 |
+
near = dist - 1
|
321 |
+
far = dist + 1
|
322 |
+
|
323 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
324 |
+
new_depths_h.append(depth * scale_factor)
|
325 |
+
|
326 |
+
# print(new_near_fars)
|
327 |
+
# print("imgs: ", len(imgs))
|
328 |
+
# print("img1 shape:", imgs[0].shape)
|
329 |
+
# print("img2 shape:", imgs[1].shape)
|
330 |
+
imgs = torch.stack(imgs).float()
|
331 |
+
depths_h = np.stack(new_depths_h)
|
332 |
+
masks_h = np.stack(masks_h)
|
333 |
+
|
334 |
+
affine_mats = np.stack(new_affine_mats)
|
335 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
336 |
+
new_near_fars)
|
337 |
+
|
338 |
+
if self.split == 'train':
|
339 |
+
start_idx = 0
|
340 |
+
else:
|
341 |
+
start_idx = 1
|
342 |
+
|
343 |
+
view_ids = [idx] + list(src_views)
|
344 |
+
|
345 |
+
sample['images'] = imgs # (V, 3, H, W)
|
346 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
347 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
348 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
349 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
350 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
351 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
352 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
353 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
354 |
+
|
355 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
356 |
+
sample['scan'] = folder_id
|
357 |
+
|
358 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
359 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
360 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
361 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
362 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
363 |
+
|
364 |
+
|
365 |
+
# - image to render
|
366 |
+
sample['query_image'] = sample['images'][0]
|
367 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
368 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
369 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
370 |
+
sample['query_depth'] = sample['depths_h'][0]
|
371 |
+
sample['query_mask'] = sample['masks_h'][0]
|
372 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
373 |
+
|
374 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
375 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
376 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
377 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
378 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
379 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
380 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
381 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
382 |
+
|
383 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
384 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
385 |
+
|
386 |
+
# - generate rays
|
387 |
+
if ('val' in self.split) or ('test' in self.split):
|
388 |
+
sample_rays = gen_rays_from_single_image(
|
389 |
+
img_wh[1], img_wh[0],
|
390 |
+
sample['query_image'],
|
391 |
+
sample['query_intrinsic'],
|
392 |
+
sample['query_c2w'],
|
393 |
+
depth=sample['query_depth'],
|
394 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
395 |
+
|
396 |
+
else:
|
397 |
+
sample_rays = gen_random_rays_from_single_image(
|
398 |
+
img_wh[1], img_wh[0],
|
399 |
+
self.N_rays,
|
400 |
+
sample['query_image'],
|
401 |
+
sample['query_intrinsic'],
|
402 |
+
sample['query_c2w'],
|
403 |
+
depth=sample['query_depth'],
|
404 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
405 |
+
dilated_mask=mask_dilated,
|
406 |
+
importance_sample=self.importance_sample)
|
407 |
+
|
408 |
+
|
409 |
+
sample['rays'] = sample_rays
|
410 |
+
|
411 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_4_2_stage_mix.py
ADDED
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 16
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow, 'r') as f:
|
96 |
+
narrow_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
99 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
100 |
+
two_stage_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance * len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
if idx % 2 == 0:
|
208 |
+
valid_list = [0, 2, 4, 6]
|
209 |
+
else:
|
210 |
+
valid_list = [1, 3, 5, 7]
|
211 |
+
|
212 |
+
if idx % 16 < 8:
|
213 |
+
idx = idx % 16 # [0, 7]
|
214 |
+
# target view
|
215 |
+
c2w = self.c2ws[idx]
|
216 |
+
w2c = np.linalg.inv(c2w)
|
217 |
+
w2c_ref = w2c
|
218 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
219 |
+
|
220 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
221 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
222 |
+
|
223 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
224 |
+
|
225 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
226 |
+
|
227 |
+
|
228 |
+
img = Image.open(img_filename)
|
229 |
+
|
230 |
+
img = self.transform(img) # (4, h, w)
|
231 |
+
|
232 |
+
|
233 |
+
if img.shape[0] == 4:
|
234 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
235 |
+
imgs += [img]
|
236 |
+
|
237 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
238 |
+
mask_h = depth_h > 0
|
239 |
+
# print("valid pixels", np.sum(mask_h))
|
240 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
241 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
242 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
243 |
+
depth_h = distance
|
244 |
+
|
245 |
+
|
246 |
+
depths_h.append(depth_h)
|
247 |
+
masks_h.append(mask_h)
|
248 |
+
|
249 |
+
intrinsic = self.intrinsic
|
250 |
+
intrinsics.append(intrinsic)
|
251 |
+
|
252 |
+
|
253 |
+
near_fars.append(self.near_fars[idx])
|
254 |
+
image_perm = 0 # only supervised on reference view
|
255 |
+
|
256 |
+
mask_dilated = None
|
257 |
+
|
258 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
259 |
+
|
260 |
+
src_views = range(8, 8 + 8 * 4)
|
261 |
+
src_views_used = []
|
262 |
+
for vid in src_views:
|
263 |
+
view_dix_to_use = (vid - 8) // 4
|
264 |
+
if view_dix_to_use not in valid_list:
|
265 |
+
continue
|
266 |
+
src_views_used.append(vid)
|
267 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
268 |
+
|
269 |
+
img = Image.open(img_filename)
|
270 |
+
img_wh = self.img_wh
|
271 |
+
|
272 |
+
img = self.transform(img)
|
273 |
+
if img.shape[0] == 4:
|
274 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
275 |
+
|
276 |
+
imgs += [img]
|
277 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
278 |
+
depths_h.append(depth_h)
|
279 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
280 |
+
|
281 |
+
near_fars.append(self.all_near_fars[vid])
|
282 |
+
intrinsics.append(self.all_intrinsics[vid])
|
283 |
+
|
284 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
285 |
+
|
286 |
+
else:
|
287 |
+
idx = idx % 16 - 8 # [0, 7]
|
288 |
+
|
289 |
+
c2w = self.c2ws[idx + 40]
|
290 |
+
w2c = np.linalg.inv(c2w)
|
291 |
+
w2c_ref = w2c
|
292 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
293 |
+
|
294 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
295 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
296 |
+
|
297 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
|
298 |
+
|
299 |
+
|
300 |
+
|
301 |
+
img = Image.open(img_filename)
|
302 |
+
|
303 |
+
img = self.transform(img) # (4, h, w)
|
304 |
+
|
305 |
+
# print("img_pre", img.shape)
|
306 |
+
if img.shape[0] == 4:
|
307 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
308 |
+
# print("img", img.shape)
|
309 |
+
imgs += [img]
|
310 |
+
|
311 |
+
|
312 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
313 |
+
depth_h = depth_h.fill_(-1.0)
|
314 |
+
# depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
|
315 |
+
# print("depth_h", depth_h.shape)
|
316 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
317 |
+
depths_h.append(depth_h)
|
318 |
+
masks_h.append(mask_h)
|
319 |
+
|
320 |
+
intrinsic = self.intrinsic
|
321 |
+
intrinsics.append(intrinsic)
|
322 |
+
|
323 |
+
|
324 |
+
near_fars.append(self.near_fars[idx])
|
325 |
+
image_perm = 0 # only supervised on reference view
|
326 |
+
|
327 |
+
mask_dilated = None
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
src_views = range(40+8, 40+8+32)
|
332 |
+
src_views_used = []
|
333 |
+
for vid in src_views:
|
334 |
+
view_dix_to_use = (vid - 40 - 8) // 4
|
335 |
+
if view_dix_to_use not in valid_list:
|
336 |
+
continue
|
337 |
+
src_views_used.append(vid)
|
338 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
|
339 |
+
|
340 |
+
img = Image.open(img_filename)
|
341 |
+
img_wh = self.img_wh
|
342 |
+
|
343 |
+
img = self.transform(img)
|
344 |
+
# print("img shape1: ", img.shape)
|
345 |
+
if img.shape[0] == 4:
|
346 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
347 |
+
# print("img shape2: ", img.shape)
|
348 |
+
imgs += [img]
|
349 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
350 |
+
depth_h = depth_h.fill_(-1.0)
|
351 |
+
depths_h.append(depth_h)
|
352 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
353 |
+
|
354 |
+
near_fars.append(self.all_near_fars[vid])
|
355 |
+
intrinsics.append(self.all_intrinsics[vid])
|
356 |
+
|
357 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
358 |
+
|
359 |
+
|
360 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
361 |
+
img_hw=[img_wh[1], img_wh[0]],
|
362 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
363 |
+
near_fars=near_fars, factor=1.1
|
364 |
+
)
|
365 |
+
|
366 |
+
|
367 |
+
new_near_fars = []
|
368 |
+
new_w2cs = []
|
369 |
+
new_c2ws = []
|
370 |
+
new_affine_mats = []
|
371 |
+
new_depths_h = []
|
372 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
373 |
+
|
374 |
+
P = intrinsic @ extrinsic @ scale_mat
|
375 |
+
P = P[:3, :4]
|
376 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
377 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
378 |
+
w2c = np.linalg.inv(c2w)
|
379 |
+
new_w2cs.append(w2c)
|
380 |
+
new_c2ws.append(c2w)
|
381 |
+
affine_mat = np.eye(4)
|
382 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
383 |
+
new_affine_mats.append(affine_mat)
|
384 |
+
|
385 |
+
camera_o = c2w[:3, 3]
|
386 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
387 |
+
near = dist - 1
|
388 |
+
far = dist + 1
|
389 |
+
|
390 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
391 |
+
|
392 |
+
new_depths_h.append(depth * scale_factor)
|
393 |
+
|
394 |
+
# print(new_near_fars)
|
395 |
+
# print("img numeber: ", len(imgs))
|
396 |
+
imgs = torch.stack(imgs).float()
|
397 |
+
depths_h = np.stack(new_depths_h)
|
398 |
+
masks_h = np.stack(masks_h)
|
399 |
+
|
400 |
+
affine_mats = np.stack(new_affine_mats)
|
401 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
402 |
+
new_near_fars)
|
403 |
+
|
404 |
+
if self.split == 'train':
|
405 |
+
start_idx = 0
|
406 |
+
else:
|
407 |
+
start_idx = 1
|
408 |
+
|
409 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
410 |
+
sample['origin_idx'] = origin_idx
|
411 |
+
sample['images'] = imgs # (V, 3, H, W)
|
412 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
413 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
414 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
415 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
416 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
417 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
418 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
419 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
420 |
+
|
421 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
422 |
+
sample['scan'] = folder_id
|
423 |
+
|
424 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
425 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
426 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
427 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
428 |
+
if view_ids[0] < 8:
|
429 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
430 |
+
else:
|
431 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
432 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
433 |
+
|
434 |
+
|
435 |
+
# - image to render
|
436 |
+
sample['query_image'] = sample['images'][0]
|
437 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
438 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
439 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
440 |
+
sample['query_depth'] = sample['depths_h'][0]
|
441 |
+
sample['query_mask'] = sample['masks_h'][0]
|
442 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
443 |
+
|
444 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
445 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
446 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
447 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
448 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
449 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
450 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
451 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
452 |
+
|
453 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
454 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
455 |
+
|
456 |
+
# - generate rays
|
457 |
+
if ('val' in self.split) or ('test' in self.split):
|
458 |
+
sample_rays = gen_rays_from_single_image(
|
459 |
+
img_wh[1], img_wh[0],
|
460 |
+
sample['query_image'],
|
461 |
+
sample['query_intrinsic'],
|
462 |
+
sample['query_c2w'],
|
463 |
+
depth=sample['query_depth'],
|
464 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
465 |
+
else:
|
466 |
+
sample_rays = gen_random_rays_from_single_image(
|
467 |
+
img_wh[1], img_wh[0],
|
468 |
+
self.N_rays,
|
469 |
+
sample['query_image'],
|
470 |
+
sample['query_intrinsic'],
|
471 |
+
sample['query_c2w'],
|
472 |
+
depth=sample['query_depth'],
|
473 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
474 |
+
dilated_mask=mask_dilated,
|
475 |
+
importance_sample=self.importance_sample)
|
476 |
+
|
477 |
+
|
478 |
+
sample['rays'] = sample_rays
|
479 |
+
|
480 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_4_narrow_and_6_2_stage_mix.py
ADDED
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow, 'r') as f:
|
96 |
+
narrow_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_5pred_pose.json"
|
99 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
100 |
+
two_stage_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (6 + 6*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return 12*len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//12]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
if idx % 12 < 8:
|
208 |
+
idx = idx % 12 # [0, 7]
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
|
223 |
+
img = Image.open(img_filename)
|
224 |
+
|
225 |
+
img = self.transform(img) # (4, h, w)
|
226 |
+
|
227 |
+
|
228 |
+
if img.shape[0] == 4:
|
229 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
230 |
+
imgs += [img]
|
231 |
+
|
232 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
233 |
+
mask_h = depth_h > 0
|
234 |
+
# print("valid pixels", np.sum(mask_h))
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
|
241 |
+
depths_h.append(depth_h)
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
intrinsic = self.intrinsic
|
245 |
+
intrinsics.append(intrinsic)
|
246 |
+
|
247 |
+
|
248 |
+
near_fars.append(self.near_fars[idx])
|
249 |
+
image_perm = 0 # only supervised on reference view
|
250 |
+
|
251 |
+
mask_dilated = None
|
252 |
+
|
253 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
254 |
+
|
255 |
+
src_views = range(8, 8 + 8 * 4)
|
256 |
+
src_views_used = []
|
257 |
+
for vid in src_views:
|
258 |
+
if (vid // 4) % 2 != idx % 2:
|
259 |
+
continue
|
260 |
+
src_views_used.append(vid)
|
261 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
262 |
+
|
263 |
+
img = Image.open(img_filename)
|
264 |
+
img_wh = self.img_wh
|
265 |
+
|
266 |
+
img = self.transform(img)
|
267 |
+
if img.shape[0] == 4:
|
268 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
269 |
+
|
270 |
+
imgs += [img]
|
271 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
272 |
+
depths_h.append(depth_h)
|
273 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
274 |
+
|
275 |
+
near_fars.append(self.all_near_fars[vid])
|
276 |
+
intrinsics.append(self.all_intrinsics[vid])
|
277 |
+
|
278 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
279 |
+
|
280 |
+
else:
|
281 |
+
idx = idx % 12 - 8 # [0, 5]
|
282 |
+
valid_list = [0, 2, 3, 5]
|
283 |
+
idx = valid_list[idx] # [0, 3]
|
284 |
+
c2w = self.c2ws[idx + 40]
|
285 |
+
w2c = np.linalg.inv(c2w)
|
286 |
+
w2c_ref = w2c
|
287 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
288 |
+
|
289 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
290 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
291 |
+
|
292 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_0.png')
|
293 |
+
|
294 |
+
|
295 |
+
|
296 |
+
img = Image.open(img_filename)
|
297 |
+
|
298 |
+
img = self.transform(img) # (4, h, w)
|
299 |
+
|
300 |
+
# print("img_pre", img.shape)
|
301 |
+
if img.shape[0] == 4:
|
302 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
303 |
+
# print("img", img.shape)
|
304 |
+
imgs += [img]
|
305 |
+
|
306 |
+
|
307 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
308 |
+
depth_h = depth_h.fill_(-1.0)
|
309 |
+
# depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
|
310 |
+
# print("depth_h", depth_h.shape)
|
311 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
312 |
+
depths_h.append(depth_h)
|
313 |
+
masks_h.append(mask_h)
|
314 |
+
|
315 |
+
intrinsic = self.intrinsic
|
316 |
+
intrinsics.append(intrinsic)
|
317 |
+
|
318 |
+
|
319 |
+
near_fars.append(self.near_fars[idx])
|
320 |
+
image_perm = 0 # only supervised on reference view
|
321 |
+
|
322 |
+
mask_dilated = None
|
323 |
+
|
324 |
+
# src_views = range(gt_view_idx * 12, (gt_view_idx + 1) * 12)
|
325 |
+
|
326 |
+
|
327 |
+
src_views = range(40+6, 40+6+24)
|
328 |
+
src_views_used = []
|
329 |
+
for vid in src_views:
|
330 |
+
view_dix_to_use = (vid - 40 - 6) // 4
|
331 |
+
if view_dix_to_use not in valid_list:
|
332 |
+
continue
|
333 |
+
src_views_used.append(vid)
|
334 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_5pred/", folder_id, uid, f'view_0_{idx}_{(vid-46) % 4 + 1}.png')
|
335 |
+
|
336 |
+
img = Image.open(img_filename)
|
337 |
+
img_wh = self.img_wh
|
338 |
+
|
339 |
+
img = self.transform(img)
|
340 |
+
# print("img shape1: ", img.shape)
|
341 |
+
if img.shape[0] == 4:
|
342 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
343 |
+
# print("img shape2: ", img.shape)
|
344 |
+
imgs += [img]
|
345 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
346 |
+
depth_h = depth_h.fill_(-1.0)
|
347 |
+
depths_h.append(depth_h)
|
348 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
349 |
+
|
350 |
+
near_fars.append(self.all_near_fars[vid])
|
351 |
+
intrinsics.append(self.all_intrinsics[vid])
|
352 |
+
|
353 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
354 |
+
|
355 |
+
|
356 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
357 |
+
img_hw=[img_wh[1], img_wh[0]],
|
358 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
359 |
+
near_fars=near_fars, factor=1.1
|
360 |
+
)
|
361 |
+
|
362 |
+
|
363 |
+
new_near_fars = []
|
364 |
+
new_w2cs = []
|
365 |
+
new_c2ws = []
|
366 |
+
new_affine_mats = []
|
367 |
+
new_depths_h = []
|
368 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
369 |
+
|
370 |
+
P = intrinsic @ extrinsic @ scale_mat
|
371 |
+
P = P[:3, :4]
|
372 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
373 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
374 |
+
w2c = np.linalg.inv(c2w)
|
375 |
+
new_w2cs.append(w2c)
|
376 |
+
new_c2ws.append(c2w)
|
377 |
+
affine_mat = np.eye(4)
|
378 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
379 |
+
new_affine_mats.append(affine_mat)
|
380 |
+
|
381 |
+
camera_o = c2w[:3, 3]
|
382 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
383 |
+
near = dist - 1
|
384 |
+
far = dist + 1
|
385 |
+
|
386 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
387 |
+
|
388 |
+
new_depths_h.append(depth * scale_factor)
|
389 |
+
|
390 |
+
# print(new_near_fars)
|
391 |
+
# print("img numeber: ", len(imgs))
|
392 |
+
imgs = torch.stack(imgs).float()
|
393 |
+
depths_h = np.stack(new_depths_h)
|
394 |
+
masks_h = np.stack(masks_h)
|
395 |
+
|
396 |
+
affine_mats = np.stack(new_affine_mats)
|
397 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
398 |
+
new_near_fars)
|
399 |
+
|
400 |
+
if self.split == 'train':
|
401 |
+
start_idx = 0
|
402 |
+
else:
|
403 |
+
start_idx = 1
|
404 |
+
|
405 |
+
view_ids = [idx_original % 12] + src_views_used
|
406 |
+
sample['origin_idx'] = origin_idx
|
407 |
+
sample['images'] = imgs # (V, 3, H, W)
|
408 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
409 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
410 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
411 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
412 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
413 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
414 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
415 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
416 |
+
|
417 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
418 |
+
sample['scan'] = folder_id
|
419 |
+
|
420 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
421 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
422 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
423 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
424 |
+
if view_ids[0] < 8:
|
425 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
426 |
+
else:
|
427 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
428 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
429 |
+
|
430 |
+
|
431 |
+
# - image to render
|
432 |
+
sample['query_image'] = sample['images'][0]
|
433 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
434 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
435 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
436 |
+
sample['query_depth'] = sample['depths_h'][0]
|
437 |
+
sample['query_mask'] = sample['masks_h'][0]
|
438 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
439 |
+
|
440 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
441 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
442 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
443 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
444 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
445 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
446 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
447 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
448 |
+
|
449 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
450 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
451 |
+
|
452 |
+
# - generate rays
|
453 |
+
if ('val' in self.split) or ('test' in self.split):
|
454 |
+
sample_rays = gen_rays_from_single_image(
|
455 |
+
img_wh[1], img_wh[0],
|
456 |
+
sample['query_image'],
|
457 |
+
sample['query_intrinsic'],
|
458 |
+
sample['query_c2w'],
|
459 |
+
depth=sample['query_depth'],
|
460 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
461 |
+
else:
|
462 |
+
sample_rays = gen_random_rays_from_single_image(
|
463 |
+
img_wh[1], img_wh[0],
|
464 |
+
self.N_rays,
|
465 |
+
sample['query_image'],
|
466 |
+
sample['query_intrinsic'],
|
467 |
+
sample['query_c2w'],
|
468 |
+
depth=sample['query_depth'],
|
469 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
470 |
+
dilated_mask=mask_dilated,
|
471 |
+
importance_sample=self.importance_sample)
|
472 |
+
|
473 |
+
|
474 |
+
sample['rays'] = sample_rays
|
475 |
+
|
476 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_6_narrow_and_6_2_stage_blend_mix.py
ADDED
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
if self.split == 'train':
|
72 |
+
self.imgs_per_instance = 12
|
73 |
+
else:
|
74 |
+
self.imgs_per_instance = 16
|
75 |
+
self.n_views = n_views
|
76 |
+
self.N_rays = N_rays
|
77 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
78 |
+
|
79 |
+
self.clean_image = clean_image
|
80 |
+
self.importance_sample = importance_sample
|
81 |
+
self.test_ref_views = test_ref_views # used for testing
|
82 |
+
self.scale_factor = 1.0
|
83 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
84 |
+
|
85 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
86 |
+
with open(lvis_json_path, 'r') as f:
|
87 |
+
lvis_paths = json.load(f)
|
88 |
+
if self.split == 'train':
|
89 |
+
self.lvis_paths = lvis_paths['train']
|
90 |
+
else:
|
91 |
+
self.lvis_paths = lvis_paths['val']
|
92 |
+
if img_wh is not None:
|
93 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
94 |
+
'img_wh must both be multiples of 32!'
|
95 |
+
|
96 |
+
|
97 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
98 |
+
with open(pose_json_path_narrow, 'r') as f:
|
99 |
+
narrow_meta = json.load(f)
|
100 |
+
|
101 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
102 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
103 |
+
two_stage_meta = json.load(f)
|
104 |
+
|
105 |
+
|
106 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
|
107 |
+
self.img_wh = (256, 256)
|
108 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
109 |
+
intrinsic = np.eye(4)
|
110 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
111 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
112 |
+
self.intrinsic = intrinsic
|
113 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
114 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
115 |
+
self.near_far[1] = 1.8
|
116 |
+
self.define_transforms()
|
117 |
+
self.blender2opencv = np.array(
|
118 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
119 |
+
)
|
120 |
+
|
121 |
+
|
122 |
+
self.c2ws = []
|
123 |
+
self.w2cs = []
|
124 |
+
self.near_fars = []
|
125 |
+
for idx, img_id in enumerate(self.img_ids):
|
126 |
+
pose = self.input_poses[idx]
|
127 |
+
c2w = pose @ self.blender2opencv
|
128 |
+
self.c2ws.append(c2w)
|
129 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
130 |
+
self.near_fars.append(self.near_far)
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
135 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
136 |
+
|
137 |
+
|
138 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
139 |
+
self.all_extrinsics = []
|
140 |
+
self.all_near_fars = []
|
141 |
+
self.load_cam_info()
|
142 |
+
|
143 |
+
# * bounding box for rendering
|
144 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
145 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
146 |
+
|
147 |
+
# - used for cost volume regularization
|
148 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
149 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
150 |
+
|
151 |
+
|
152 |
+
def define_transforms(self):
|
153 |
+
self.transform = T.Compose([T.ToTensor()])
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
def load_cam_info(self):
|
158 |
+
for vid, img_id in enumerate(self.img_ids):
|
159 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
160 |
+
self.all_intrinsics.append(intrinsic)
|
161 |
+
self.all_extrinsics.append(extrinsic)
|
162 |
+
self.all_near_fars.append(near_far)
|
163 |
+
|
164 |
+
def read_depth(self, filename):
|
165 |
+
pass
|
166 |
+
|
167 |
+
def read_mask(self, filename):
|
168 |
+
mask_h = cv2.imread(filename, 0)
|
169 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
170 |
+
interpolation=cv2.INTER_NEAREST)
|
171 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
172 |
+
interpolation=cv2.INTER_NEAREST)
|
173 |
+
|
174 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
175 |
+
mask_h[mask_h > 0] = 1
|
176 |
+
|
177 |
+
return mask, mask_h
|
178 |
+
|
179 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
180 |
+
|
181 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
182 |
+
|
183 |
+
radius = radius * factor
|
184 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
185 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
186 |
+
scale_mat = scale_mat.astype(np.float32)
|
187 |
+
|
188 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
189 |
+
|
190 |
+
def __len__(self):
|
191 |
+
return self.imgs_per_instance*len(self.lvis_paths)
|
192 |
+
|
193 |
+
|
194 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
195 |
+
pass
|
196 |
+
|
197 |
+
|
198 |
+
def __getitem__(self, idx):
|
199 |
+
sample = {}
|
200 |
+
origin_idx = idx
|
201 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
202 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
203 |
+
idx_original=idx
|
204 |
+
|
205 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
206 |
+
|
207 |
+
folder_id = folder_uid_dict['folder_id']
|
208 |
+
uid = folder_uid_dict['uid']
|
209 |
+
|
210 |
+
if self.split == 'train':
|
211 |
+
if idx == 4:
|
212 |
+
idx = 5
|
213 |
+
elif idx == 5:
|
214 |
+
idx = 7
|
215 |
+
elif idx == 10:
|
216 |
+
idx = 13
|
217 |
+
elif idx == 11:
|
218 |
+
idx = 15
|
219 |
+
|
220 |
+
if idx % 16 < 8: # narrow image as target
|
221 |
+
idx = idx % 16 # [0, 7]
|
222 |
+
# target view
|
223 |
+
c2w = self.c2ws[idx]
|
224 |
+
w2c = np.linalg.inv(c2w)
|
225 |
+
w2c_ref = w2c
|
226 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
227 |
+
|
228 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
229 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
230 |
+
|
231 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
232 |
+
|
233 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
234 |
+
|
235 |
+
|
236 |
+
img = Image.open(img_filename)
|
237 |
+
|
238 |
+
img = self.transform(img) # (4, h, w)
|
239 |
+
|
240 |
+
|
241 |
+
if img.shape[0] == 4:
|
242 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
243 |
+
imgs += [img]
|
244 |
+
|
245 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
246 |
+
mask_h = depth_h > 0
|
247 |
+
# print("valid pixels", np.sum(mask_h))
|
248 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
249 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
250 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
251 |
+
depth_h = distance
|
252 |
+
|
253 |
+
else:
|
254 |
+
idx = idx % 16 - 8 # [0, 5]
|
255 |
+
c2w = self.c2ws[idx + 40]
|
256 |
+
w2c = np.linalg.inv(c2w)
|
257 |
+
w2c_ref = w2c
|
258 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
259 |
+
|
260 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
261 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
262 |
+
|
263 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
|
264 |
+
|
265 |
+
img = Image.open(img_filename)
|
266 |
+
img = self.transform(img) # (4, h, w)
|
267 |
+
|
268 |
+
# print("img_pre", img.shape)
|
269 |
+
if img.shape[0] == 4:
|
270 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
271 |
+
# print("img", img.shape)
|
272 |
+
imgs += [img]
|
273 |
+
|
274 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
275 |
+
depth_h = depth_h.fill_(-1.0)
|
276 |
+
|
277 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
278 |
+
depths_h.append(depth_h)
|
279 |
+
masks_h.append(mask_h)
|
280 |
+
|
281 |
+
intrinsic = self.intrinsic
|
282 |
+
intrinsics.append(intrinsic)
|
283 |
+
|
284 |
+
|
285 |
+
near_fars.append(self.near_fars[idx])
|
286 |
+
image_perm = 0 # only supervised on reference view
|
287 |
+
|
288 |
+
mask_dilated = None
|
289 |
+
if_use_narrow = []
|
290 |
+
if self.split == 'train':
|
291 |
+
for i in range(8):
|
292 |
+
if np.random.random() > 0.5:
|
293 |
+
if_use_narrow.append(True) # use narrow
|
294 |
+
else:
|
295 |
+
if_use_narrow.append(False) # 2-stage prediction
|
296 |
+
if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
|
297 |
+
else:
|
298 |
+
for i in range(8):
|
299 |
+
if_use_narrow.append( True if origin_idx < 8 else False)
|
300 |
+
src_views = range(8, 8 + 8 * 4)
|
301 |
+
src_views_used = []
|
302 |
+
for vid in src_views:
|
303 |
+
if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
|
304 |
+
continue
|
305 |
+
src_views_used.append(vid)
|
306 |
+
cur_view_id = (vid - 8) // 4
|
307 |
+
# choose narrow
|
308 |
+
if if_use_narrow[cur_view_id]:
|
309 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
|
310 |
+
else: # choose 2-stage
|
311 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
|
312 |
+
|
313 |
+
img = Image.open(img_filename)
|
314 |
+
img_wh = self.img_wh
|
315 |
+
|
316 |
+
img = self.transform(img)
|
317 |
+
if img.shape[0] == 4:
|
318 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
319 |
+
|
320 |
+
imgs += [img]
|
321 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
322 |
+
depths_h.append(depth_h)
|
323 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
324 |
+
|
325 |
+
near_fars.append(self.all_near_fars[vid])
|
326 |
+
intrinsics.append(self.all_intrinsics[vid])
|
327 |
+
|
328 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
329 |
+
|
330 |
+
|
331 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
332 |
+
img_hw=[img_wh[1], img_wh[0]],
|
333 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
334 |
+
near_fars=near_fars, factor=1.1
|
335 |
+
)
|
336 |
+
|
337 |
+
|
338 |
+
new_near_fars = []
|
339 |
+
new_w2cs = []
|
340 |
+
new_c2ws = []
|
341 |
+
new_affine_mats = []
|
342 |
+
new_depths_h = []
|
343 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
344 |
+
|
345 |
+
P = intrinsic @ extrinsic @ scale_mat
|
346 |
+
P = P[:3, :4]
|
347 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
348 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
349 |
+
w2c = np.linalg.inv(c2w)
|
350 |
+
new_w2cs.append(w2c)
|
351 |
+
new_c2ws.append(c2w)
|
352 |
+
affine_mat = np.eye(4)
|
353 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
354 |
+
new_affine_mats.append(affine_mat)
|
355 |
+
|
356 |
+
camera_o = c2w[:3, 3]
|
357 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
358 |
+
near = dist - 1
|
359 |
+
far = dist + 1
|
360 |
+
|
361 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
362 |
+
new_depths_h.append(depth * scale_factor)
|
363 |
+
|
364 |
+
|
365 |
+
imgs = torch.stack(imgs).float()
|
366 |
+
depths_h = np.stack(new_depths_h)
|
367 |
+
masks_h = np.stack(masks_h)
|
368 |
+
|
369 |
+
affine_mats = np.stack(new_affine_mats)
|
370 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
371 |
+
new_near_fars)
|
372 |
+
|
373 |
+
if self.split == 'train':
|
374 |
+
start_idx = 0
|
375 |
+
else:
|
376 |
+
start_idx = 1
|
377 |
+
|
378 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
379 |
+
sample['origin_idx'] = origin_idx
|
380 |
+
sample['images'] = imgs # (V, 3, H, W)
|
381 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
382 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
383 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
384 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
385 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
386 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
387 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
388 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
389 |
+
|
390 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
391 |
+
sample['scan'] = folder_id
|
392 |
+
|
393 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
394 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
395 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
396 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
397 |
+
if view_ids[0] < 8:
|
398 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
399 |
+
else:
|
400 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
401 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
402 |
+
|
403 |
+
|
404 |
+
# - image to render
|
405 |
+
sample['query_image'] = sample['images'][0]
|
406 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
407 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
408 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
409 |
+
sample['query_depth'] = sample['depths_h'][0]
|
410 |
+
sample['query_mask'] = sample['masks_h'][0]
|
411 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
412 |
+
|
413 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
414 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
415 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
416 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
417 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
418 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
419 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
420 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
421 |
+
|
422 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
423 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
424 |
+
|
425 |
+
# - generate rays
|
426 |
+
if ('val' in self.split) or ('test' in self.split):
|
427 |
+
sample_rays = gen_rays_from_single_image(
|
428 |
+
img_wh[1], img_wh[0],
|
429 |
+
sample['query_image'],
|
430 |
+
sample['query_intrinsic'],
|
431 |
+
sample['query_c2w'],
|
432 |
+
depth=sample['query_depth'],
|
433 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
434 |
+
else:
|
435 |
+
sample_rays = gen_random_rays_from_single_image(
|
436 |
+
img_wh[1], img_wh[0],
|
437 |
+
self.N_rays,
|
438 |
+
sample['query_image'],
|
439 |
+
sample['query_intrinsic'],
|
440 |
+
sample['query_c2w'],
|
441 |
+
depth=sample['query_depth'],
|
442 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
443 |
+
dilated_mask=mask_dilated,
|
444 |
+
importance_sample=self.importance_sample)
|
445 |
+
|
446 |
+
|
447 |
+
sample['rays'] = sample_rays
|
448 |
+
|
449 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_2_stage.py
ADDED
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
|
72 |
+
self.imgs_per_instance = 8
|
73 |
+
|
74 |
+
self.n_views = n_views
|
75 |
+
self.N_rays = N_rays
|
76 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
77 |
+
|
78 |
+
self.clean_image = clean_image
|
79 |
+
self.importance_sample = importance_sample
|
80 |
+
self.test_ref_views = test_ref_views # used for testing
|
81 |
+
self.scale_factor = 1.0
|
82 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
83 |
+
|
84 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
85 |
+
with open(lvis_json_path, 'r') as f:
|
86 |
+
lvis_paths = json.load(f)
|
87 |
+
if self.split == 'train':
|
88 |
+
self.lvis_paths = lvis_paths['train']
|
89 |
+
else:
|
90 |
+
self.lvis_paths = lvis_paths['val']
|
91 |
+
if img_wh is not None:
|
92 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
93 |
+
'img_wh must both be multiples of 32!'
|
94 |
+
|
95 |
+
|
96 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
97 |
+
with open(pose_json_path_narrow, 'r') as f:
|
98 |
+
narrow_meta = json.load(f)
|
99 |
+
|
100 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
101 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
102 |
+
two_stage_meta = json.load(f)
|
103 |
+
|
104 |
+
|
105 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
|
106 |
+
self.img_wh = (256, 256)
|
107 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
108 |
+
intrinsic = np.eye(4)
|
109 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
110 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
111 |
+
self.intrinsic = intrinsic
|
112 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
113 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
114 |
+
self.near_far[1] = 1.8
|
115 |
+
self.define_transforms()
|
116 |
+
self.blender2opencv = np.array(
|
117 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
self.c2ws = []
|
122 |
+
self.w2cs = []
|
123 |
+
self.near_fars = []
|
124 |
+
for idx, img_id in enumerate(self.img_ids):
|
125 |
+
pose = self.input_poses[idx]
|
126 |
+
c2w = pose @ self.blender2opencv
|
127 |
+
self.c2ws.append(c2w)
|
128 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
129 |
+
self.near_fars.append(self.near_far)
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
134 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
135 |
+
|
136 |
+
|
137 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
138 |
+
self.all_extrinsics = []
|
139 |
+
self.all_near_fars = []
|
140 |
+
self.load_cam_info()
|
141 |
+
|
142 |
+
# * bounding box for rendering
|
143 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
144 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
145 |
+
|
146 |
+
# - used for cost volume regularization
|
147 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
148 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
149 |
+
|
150 |
+
|
151 |
+
def define_transforms(self):
|
152 |
+
self.transform = T.Compose([T.ToTensor()])
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
def load_cam_info(self):
|
157 |
+
for vid, img_id in enumerate(self.img_ids):
|
158 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
159 |
+
self.all_intrinsics.append(intrinsic)
|
160 |
+
self.all_extrinsics.append(extrinsic)
|
161 |
+
self.all_near_fars.append(near_far)
|
162 |
+
|
163 |
+
def read_depth(self, filename):
|
164 |
+
pass
|
165 |
+
|
166 |
+
def read_mask(self, filename):
|
167 |
+
mask_h = cv2.imread(filename, 0)
|
168 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
171 |
+
interpolation=cv2.INTER_NEAREST)
|
172 |
+
|
173 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
174 |
+
mask_h[mask_h > 0] = 1
|
175 |
+
|
176 |
+
return mask, mask_h
|
177 |
+
|
178 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
179 |
+
|
180 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
181 |
+
|
182 |
+
radius = radius * factor
|
183 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
184 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
185 |
+
scale_mat = scale_mat.astype(np.float32)
|
186 |
+
|
187 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
188 |
+
|
189 |
+
def __len__(self):
|
190 |
+
return self.imgs_per_instance * len(self.lvis_paths)
|
191 |
+
|
192 |
+
|
193 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
194 |
+
pass
|
195 |
+
|
196 |
+
|
197 |
+
def __getitem__(self, idx):
|
198 |
+
sample = {}
|
199 |
+
origin_idx = idx
|
200 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
201 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
202 |
+
idx_original=idx
|
203 |
+
|
204 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
205 |
+
|
206 |
+
folder_id = folder_uid_dict['folder_id']
|
207 |
+
uid = folder_uid_dict['uid']
|
208 |
+
|
209 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
210 |
+
# target view
|
211 |
+
c2w = self.c2ws[idx]
|
212 |
+
w2c = np.linalg.inv(c2w)
|
213 |
+
w2c_ref = w2c
|
214 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
215 |
+
|
216 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
217 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
218 |
+
|
219 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
220 |
+
|
221 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
222 |
+
|
223 |
+
|
224 |
+
img = Image.open(img_filename)
|
225 |
+
|
226 |
+
img = self.transform(img) # (4, h, w)
|
227 |
+
|
228 |
+
|
229 |
+
if img.shape[0] == 4:
|
230 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
231 |
+
imgs += [img]
|
232 |
+
|
233 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
234 |
+
mask_h = depth_h > 0
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
|
241 |
+
depths_h.append(depth_h)
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
intrinsic = self.intrinsic
|
245 |
+
intrinsics.append(intrinsic)
|
246 |
+
|
247 |
+
|
248 |
+
near_fars.append(self.near_fars[idx])
|
249 |
+
image_perm = 0 # only supervised on reference view
|
250 |
+
|
251 |
+
mask_dilated = None
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
src_views = range(8, 8+32)
|
256 |
+
src_views_used = []
|
257 |
+
for vid in src_views:
|
258 |
+
view_dix_to_use = (vid - 8) // 4
|
259 |
+
src_views_used.append(vid)
|
260 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-8) % 4 + 1}.png')
|
261 |
+
|
262 |
+
img = Image.open(img_filename)
|
263 |
+
img_wh = self.img_wh
|
264 |
+
|
265 |
+
img = self.transform(img)
|
266 |
+
if img.shape[0] == 4:
|
267 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
268 |
+
imgs += [img]
|
269 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
270 |
+
depth_h = depth_h.fill_(-1.0)
|
271 |
+
depths_h.append(depth_h)
|
272 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
273 |
+
|
274 |
+
near_fars.append(self.all_near_fars[vid])
|
275 |
+
intrinsics.append(self.all_intrinsics[vid])
|
276 |
+
|
277 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
278 |
+
|
279 |
+
|
280 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
281 |
+
img_hw=[img_wh[1], img_wh[0]],
|
282 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
283 |
+
near_fars=near_fars, factor=1.1
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
new_near_fars = []
|
288 |
+
new_w2cs = []
|
289 |
+
new_c2ws = []
|
290 |
+
new_affine_mats = []
|
291 |
+
new_depths_h = []
|
292 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
293 |
+
|
294 |
+
P = intrinsic @ extrinsic @ scale_mat
|
295 |
+
P = P[:3, :4]
|
296 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
297 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
298 |
+
w2c = np.linalg.inv(c2w)
|
299 |
+
new_w2cs.append(w2c)
|
300 |
+
new_c2ws.append(c2w)
|
301 |
+
affine_mat = np.eye(4)
|
302 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
303 |
+
new_affine_mats.append(affine_mat)
|
304 |
+
|
305 |
+
camera_o = c2w[:3, 3]
|
306 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
307 |
+
near = dist - 1
|
308 |
+
far = dist + 1
|
309 |
+
|
310 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
311 |
+
|
312 |
+
new_depths_h.append(depth * scale_factor)
|
313 |
+
|
314 |
+
|
315 |
+
imgs = torch.stack(imgs).float()
|
316 |
+
depths_h = np.stack(new_depths_h)
|
317 |
+
masks_h = np.stack(masks_h)
|
318 |
+
|
319 |
+
affine_mats = np.stack(new_affine_mats)
|
320 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
321 |
+
new_near_fars)
|
322 |
+
|
323 |
+
if self.split == 'train':
|
324 |
+
start_idx = 0
|
325 |
+
else:
|
326 |
+
start_idx = 1
|
327 |
+
|
328 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
329 |
+
sample['origin_idx'] = origin_idx
|
330 |
+
sample['images'] = imgs # (V, 3, H, W)
|
331 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
332 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
333 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
334 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
335 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
336 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
337 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
338 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
339 |
+
|
340 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
341 |
+
sample['scan'] = folder_id
|
342 |
+
|
343 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
344 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
345 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
346 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
347 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
348 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
349 |
+
|
350 |
+
|
351 |
+
# - image to render
|
352 |
+
sample['query_image'] = sample['images'][0]
|
353 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
354 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
355 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
356 |
+
sample['query_depth'] = sample['depths_h'][0]
|
357 |
+
sample['query_mask'] = sample['masks_h'][0]
|
358 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
359 |
+
|
360 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
361 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
362 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
363 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
364 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
365 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
366 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
367 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
368 |
+
|
369 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
370 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
371 |
+
|
372 |
+
# - generate rays
|
373 |
+
if ('val' in self.split) or ('test' in self.split):
|
374 |
+
sample_rays = gen_rays_from_single_image(
|
375 |
+
img_wh[1], img_wh[0],
|
376 |
+
sample['query_image'],
|
377 |
+
sample['query_intrinsic'],
|
378 |
+
sample['query_c2w'],
|
379 |
+
depth=sample['query_depth'],
|
380 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
381 |
+
else:
|
382 |
+
sample_rays = gen_random_rays_from_single_image(
|
383 |
+
img_wh[1], img_wh[0],
|
384 |
+
self.N_rays,
|
385 |
+
sample['query_image'],
|
386 |
+
sample['query_intrinsic'],
|
387 |
+
sample['query_c2w'],
|
388 |
+
depth=sample['query_depth'],
|
389 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
390 |
+
dilated_mask=mask_dilated,
|
391 |
+
importance_sample=self.importance_sample)
|
392 |
+
|
393 |
+
|
394 |
+
sample['rays'] = sample_rays
|
395 |
+
|
396 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_4_gt.py
ADDED
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
|
72 |
+
self.imgs_per_instance = 8
|
73 |
+
|
74 |
+
self.n_views = n_views
|
75 |
+
self.N_rays = N_rays
|
76 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
77 |
+
|
78 |
+
self.clean_image = clean_image
|
79 |
+
self.importance_sample = importance_sample
|
80 |
+
self.test_ref_views = test_ref_views # used for testing
|
81 |
+
self.scale_factor = 1.0
|
82 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
83 |
+
|
84 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
85 |
+
with open(lvis_json_path, 'r') as f:
|
86 |
+
lvis_paths = json.load(f)
|
87 |
+
if self.split == 'train':
|
88 |
+
self.lvis_paths = lvis_paths['train']
|
89 |
+
else:
|
90 |
+
self.lvis_paths = lvis_paths['val']
|
91 |
+
if img_wh is not None:
|
92 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
93 |
+
'img_wh must both be multiples of 32!'
|
94 |
+
|
95 |
+
|
96 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
97 |
+
with open(pose_json_path_narrow, 'r') as f:
|
98 |
+
narrow_meta = json.load(f)
|
99 |
+
|
100 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
101 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
102 |
+
two_stage_meta = json.load(f)
|
103 |
+
|
104 |
+
|
105 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
|
106 |
+
self.img_wh = (256, 256)
|
107 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
108 |
+
intrinsic = np.eye(4)
|
109 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
110 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
111 |
+
self.intrinsic = intrinsic
|
112 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
113 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
114 |
+
self.near_far[1] = 1.8
|
115 |
+
self.define_transforms()
|
116 |
+
self.blender2opencv = np.array(
|
117 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
self.c2ws = []
|
122 |
+
self.w2cs = []
|
123 |
+
self.near_fars = []
|
124 |
+
for idx, img_id in enumerate(self.img_ids):
|
125 |
+
pose = self.input_poses[idx]
|
126 |
+
c2w = pose @ self.blender2opencv
|
127 |
+
self.c2ws.append(c2w)
|
128 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
129 |
+
self.near_fars.append(self.near_far)
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
134 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
135 |
+
|
136 |
+
|
137 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
138 |
+
self.all_extrinsics = []
|
139 |
+
self.all_near_fars = []
|
140 |
+
self.load_cam_info()
|
141 |
+
|
142 |
+
# * bounding box for rendering
|
143 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
144 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
145 |
+
|
146 |
+
# - used for cost volume regularization
|
147 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
148 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
149 |
+
|
150 |
+
|
151 |
+
def define_transforms(self):
|
152 |
+
self.transform = T.Compose([T.ToTensor()])
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
def load_cam_info(self):
|
157 |
+
for vid, img_id in enumerate(self.img_ids):
|
158 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
159 |
+
self.all_intrinsics.append(intrinsic)
|
160 |
+
self.all_extrinsics.append(extrinsic)
|
161 |
+
self.all_near_fars.append(near_far)
|
162 |
+
|
163 |
+
def read_depth(self, filename):
|
164 |
+
pass
|
165 |
+
|
166 |
+
def read_mask(self, filename):
|
167 |
+
mask_h = cv2.imread(filename, 0)
|
168 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
171 |
+
interpolation=cv2.INTER_NEAREST)
|
172 |
+
|
173 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
174 |
+
mask_h[mask_h > 0] = 1
|
175 |
+
|
176 |
+
return mask, mask_h
|
177 |
+
|
178 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
179 |
+
|
180 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
181 |
+
|
182 |
+
radius = radius * factor
|
183 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
184 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
185 |
+
scale_mat = scale_mat.astype(np.float32)
|
186 |
+
|
187 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
188 |
+
|
189 |
+
def __len__(self):
|
190 |
+
return self.imgs_per_instance * len(self.lvis_paths)
|
191 |
+
|
192 |
+
|
193 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
194 |
+
pass
|
195 |
+
|
196 |
+
|
197 |
+
def __getitem__(self, idx):
|
198 |
+
sample = {}
|
199 |
+
origin_idx = idx
|
200 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
201 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
202 |
+
idx_original=idx
|
203 |
+
|
204 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
205 |
+
|
206 |
+
folder_id = folder_uid_dict['folder_id']
|
207 |
+
uid = folder_uid_dict['uid']
|
208 |
+
|
209 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
210 |
+
# target view
|
211 |
+
c2w = self.c2ws[idx]
|
212 |
+
w2c = np.linalg.inv(c2w)
|
213 |
+
w2c_ref = w2c
|
214 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
215 |
+
|
216 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
217 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
218 |
+
|
219 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
220 |
+
|
221 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
222 |
+
|
223 |
+
|
224 |
+
img = Image.open(img_filename)
|
225 |
+
|
226 |
+
img = self.transform(img) # (4, h, w)
|
227 |
+
|
228 |
+
|
229 |
+
if img.shape[0] == 4:
|
230 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
231 |
+
imgs += [img]
|
232 |
+
|
233 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
234 |
+
mask_h = depth_h > 0
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
|
241 |
+
depths_h.append(depth_h)
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
intrinsic = self.intrinsic
|
245 |
+
intrinsics.append(intrinsic)
|
246 |
+
|
247 |
+
|
248 |
+
near_fars.append(self.near_fars[idx])
|
249 |
+
image_perm = 0 # only supervised on reference view
|
250 |
+
|
251 |
+
mask_dilated = None
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
src_views = range(8, 8+32)
|
256 |
+
src_views_used = []
|
257 |
+
for vid in src_views:
|
258 |
+
view_dix_to_use = (vid - 8) // 4
|
259 |
+
src_views_used.append(vid)
|
260 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10_gt.png')
|
261 |
+
|
262 |
+
img = Image.open(img_filename)
|
263 |
+
img_wh = self.img_wh
|
264 |
+
|
265 |
+
img = self.transform(img)
|
266 |
+
if img.shape[0] == 4:
|
267 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
268 |
+
imgs += [img]
|
269 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
270 |
+
depth_h = depth_h.fill_(-1.0)
|
271 |
+
depths_h.append(depth_h)
|
272 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
273 |
+
|
274 |
+
near_fars.append(self.all_near_fars[vid])
|
275 |
+
intrinsics.append(self.all_intrinsics[vid])
|
276 |
+
|
277 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
278 |
+
|
279 |
+
|
280 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
281 |
+
img_hw=[img_wh[1], img_wh[0]],
|
282 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
283 |
+
near_fars=near_fars, factor=1.1
|
284 |
+
)
|
285 |
+
|
286 |
+
|
287 |
+
new_near_fars = []
|
288 |
+
new_w2cs = []
|
289 |
+
new_c2ws = []
|
290 |
+
new_affine_mats = []
|
291 |
+
new_depths_h = []
|
292 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
293 |
+
|
294 |
+
P = intrinsic @ extrinsic @ scale_mat
|
295 |
+
P = P[:3, :4]
|
296 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
297 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
298 |
+
w2c = np.linalg.inv(c2w)
|
299 |
+
new_w2cs.append(w2c)
|
300 |
+
new_c2ws.append(c2w)
|
301 |
+
affine_mat = np.eye(4)
|
302 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
303 |
+
new_affine_mats.append(affine_mat)
|
304 |
+
|
305 |
+
camera_o = c2w[:3, 3]
|
306 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
307 |
+
near = dist - 1
|
308 |
+
far = dist + 1
|
309 |
+
|
310 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
311 |
+
|
312 |
+
new_depths_h.append(depth * scale_factor)
|
313 |
+
|
314 |
+
|
315 |
+
imgs = torch.stack(imgs).float()
|
316 |
+
depths_h = np.stack(new_depths_h)
|
317 |
+
masks_h = np.stack(masks_h)
|
318 |
+
|
319 |
+
affine_mats = np.stack(new_affine_mats)
|
320 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
321 |
+
new_near_fars)
|
322 |
+
|
323 |
+
if self.split == 'train':
|
324 |
+
start_idx = 0
|
325 |
+
else:
|
326 |
+
start_idx = 1
|
327 |
+
|
328 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
329 |
+
sample['origin_idx'] = origin_idx
|
330 |
+
sample['images'] = imgs # (V, 3, H, W)
|
331 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
332 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
333 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
334 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
335 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
336 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
337 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
338 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
339 |
+
|
340 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
341 |
+
sample['scan'] = folder_id
|
342 |
+
|
343 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
344 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
345 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
346 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
347 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
348 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
349 |
+
|
350 |
+
|
351 |
+
# - image to render
|
352 |
+
sample['query_image'] = sample['images'][0]
|
353 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
354 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
355 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
356 |
+
sample['query_depth'] = sample['depths_h'][0]
|
357 |
+
sample['query_mask'] = sample['masks_h'][0]
|
358 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
359 |
+
|
360 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
361 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
362 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
363 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
364 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
365 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
366 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
367 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
368 |
+
|
369 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
370 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
371 |
+
|
372 |
+
# - generate rays
|
373 |
+
if ('val' in self.split) or ('test' in self.split):
|
374 |
+
sample_rays = gen_rays_from_single_image(
|
375 |
+
img_wh[1], img_wh[0],
|
376 |
+
sample['query_image'],
|
377 |
+
sample['query_intrinsic'],
|
378 |
+
sample['query_c2w'],
|
379 |
+
depth=sample['query_depth'],
|
380 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
381 |
+
else:
|
382 |
+
sample_rays = gen_random_rays_from_single_image(
|
383 |
+
img_wh[1], img_wh[0],
|
384 |
+
self.N_rays,
|
385 |
+
sample['query_image'],
|
386 |
+
sample['query_intrinsic'],
|
387 |
+
sample['query_c2w'],
|
388 |
+
depth=sample['query_depth'],
|
389 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
390 |
+
dilated_mask=mask_dilated,
|
391 |
+
importance_sample=self.importance_sample)
|
392 |
+
|
393 |
+
|
394 |
+
sample['rays'] = sample_rays
|
395 |
+
|
396 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_3_views.py
ADDED
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 16
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow, 'r') as f:
|
96 |
+
narrow_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
99 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
100 |
+
two_stage_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 4*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance*len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
if idx % 16 < 8: # narrow image as target
|
208 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
|
223 |
+
img = Image.open(img_filename)
|
224 |
+
|
225 |
+
img = self.transform(img) # (4, h, w)
|
226 |
+
|
227 |
+
|
228 |
+
if img.shape[0] == 4:
|
229 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
230 |
+
imgs += [img]
|
231 |
+
|
232 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
233 |
+
mask_h = depth_h > 0
|
234 |
+
# print("valid pixels", np.sum(mask_h))
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
else:
|
241 |
+
idx = idx % self.imgs_per_instance - 8 # [0, 5]
|
242 |
+
c2w = self.c2ws[idx + 40]
|
243 |
+
w2c = np.linalg.inv(c2w)
|
244 |
+
w2c_ref = w2c
|
245 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
246 |
+
|
247 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
248 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
249 |
+
|
250 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
|
251 |
+
|
252 |
+
|
253 |
+
img = Image.open(img_filename)
|
254 |
+
img = self.transform(img) # (4, h, w)
|
255 |
+
|
256 |
+
# print("img_pre", img.shape)
|
257 |
+
if img.shape[0] == 4:
|
258 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
259 |
+
# print("img", img.shape)
|
260 |
+
imgs += [img]
|
261 |
+
|
262 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
263 |
+
depth_h = depth_h.fill_(-1.0)
|
264 |
+
|
265 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
266 |
+
depths_h.append(depth_h)
|
267 |
+
masks_h.append(mask_h)
|
268 |
+
|
269 |
+
intrinsic = self.intrinsic
|
270 |
+
intrinsics.append(intrinsic)
|
271 |
+
|
272 |
+
|
273 |
+
near_fars.append(self.near_fars[idx])
|
274 |
+
image_perm = 0 # only supervised on reference view
|
275 |
+
|
276 |
+
mask_dilated = None
|
277 |
+
if_use_narrow = []
|
278 |
+
if self.split == 'train':
|
279 |
+
for i in range(8):
|
280 |
+
if np.random.random() > 0.5:
|
281 |
+
if_use_narrow.append(True) # use narrow
|
282 |
+
else:
|
283 |
+
if_use_narrow.append(False) # 2-stage prediction
|
284 |
+
if_use_narrow[origin_idx % 8] = True if origin_idx < 8 else False
|
285 |
+
else:
|
286 |
+
for i in range(8):
|
287 |
+
if_use_narrow.append( True if origin_idx < 8 else False)
|
288 |
+
|
289 |
+
src_views = list()
|
290 |
+
for i in range(8):
|
291 |
+
# randomly choose 3 different number from [0,3]
|
292 |
+
local_idxs = np.random.choice(4, 3, replace=False)
|
293 |
+
local_idxs = [0,1,2]
|
294 |
+
local_idxs = [8+i*4+local_idx for local_idx in local_idxs]
|
295 |
+
src_views += local_idxs
|
296 |
+
src_views_used = []
|
297 |
+
for vid in src_views:
|
298 |
+
src_views_used.append(vid)
|
299 |
+
cur_view_id = (vid - 8) // 4
|
300 |
+
# choose narrow
|
301 |
+
if if_use_narrow[cur_view_id]:
|
302 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
|
303 |
+
else: # choose 2-stage
|
304 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{(vid - 8) // 4}_{(vid-8) % 4 + 1}.png')
|
305 |
+
|
306 |
+
img = Image.open(img_filename)
|
307 |
+
img_wh = self.img_wh
|
308 |
+
|
309 |
+
img = self.transform(img)
|
310 |
+
if img.shape[0] == 4:
|
311 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
312 |
+
|
313 |
+
imgs += [img]
|
314 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
315 |
+
depths_h.append(depth_h)
|
316 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
317 |
+
|
318 |
+
near_fars.append(self.all_near_fars[vid])
|
319 |
+
intrinsics.append(self.all_intrinsics[vid])
|
320 |
+
|
321 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
|
326 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
327 |
+
img_hw=[img_wh[1], img_wh[0]],
|
328 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
329 |
+
near_fars=near_fars, factor=1.1
|
330 |
+
)
|
331 |
+
|
332 |
+
|
333 |
+
new_near_fars = []
|
334 |
+
new_w2cs = []
|
335 |
+
new_c2ws = []
|
336 |
+
new_affine_mats = []
|
337 |
+
new_depths_h = []
|
338 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
339 |
+
|
340 |
+
P = intrinsic @ extrinsic @ scale_mat
|
341 |
+
P = P[:3, :4]
|
342 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
343 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
344 |
+
w2c = np.linalg.inv(c2w)
|
345 |
+
new_w2cs.append(w2c)
|
346 |
+
new_c2ws.append(c2w)
|
347 |
+
affine_mat = np.eye(4)
|
348 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
349 |
+
new_affine_mats.append(affine_mat)
|
350 |
+
|
351 |
+
camera_o = c2w[:3, 3]
|
352 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
353 |
+
near = dist - 1
|
354 |
+
far = dist + 1
|
355 |
+
|
356 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
357 |
+
|
358 |
+
new_depths_h.append(depth * scale_factor)
|
359 |
+
|
360 |
+
# print(new_near_fars)
|
361 |
+
# print("img numeber: ", len(imgs))
|
362 |
+
imgs = torch.stack(imgs).float()
|
363 |
+
depths_h = np.stack(new_depths_h)
|
364 |
+
masks_h = np.stack(masks_h)
|
365 |
+
|
366 |
+
affine_mats = np.stack(new_affine_mats)
|
367 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
368 |
+
new_near_fars)
|
369 |
+
|
370 |
+
if self.split == 'train':
|
371 |
+
start_idx = 0
|
372 |
+
else:
|
373 |
+
start_idx = 1
|
374 |
+
|
375 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
376 |
+
sample['origin_idx'] = origin_idx
|
377 |
+
sample['images'] = imgs # (V, 3, H, W)
|
378 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
379 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
380 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
381 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
382 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
383 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
384 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
385 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
386 |
+
|
387 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
388 |
+
sample['scan'] = folder_id
|
389 |
+
|
390 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
391 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
392 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
393 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
394 |
+
if view_ids[0] < 8:
|
395 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
396 |
+
else:
|
397 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
398 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
399 |
+
|
400 |
+
|
401 |
+
# - image to render
|
402 |
+
sample['query_image'] = sample['images'][0]
|
403 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
404 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
405 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
406 |
+
sample['query_depth'] = sample['depths_h'][0]
|
407 |
+
sample['query_mask'] = sample['masks_h'][0]
|
408 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
409 |
+
|
410 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
411 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
412 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
413 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
414 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
415 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
416 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
417 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
418 |
+
|
419 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
420 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
421 |
+
|
422 |
+
# - generate rays
|
423 |
+
if ('val' in self.split) or ('test' in self.split):
|
424 |
+
sample_rays = gen_rays_from_single_image(
|
425 |
+
img_wh[1], img_wh[0],
|
426 |
+
sample['query_image'],
|
427 |
+
sample['query_intrinsic'],
|
428 |
+
sample['query_c2w'],
|
429 |
+
depth=sample['query_depth'],
|
430 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
431 |
+
else:
|
432 |
+
sample_rays = gen_random_rays_from_single_image(
|
433 |
+
img_wh[1], img_wh[0],
|
434 |
+
self.N_rays,
|
435 |
+
sample['query_image'],
|
436 |
+
sample['query_intrinsic'],
|
437 |
+
sample['query_c2w'],
|
438 |
+
depth=sample['query_depth'],
|
439 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
440 |
+
dilated_mask=mask_dilated,
|
441 |
+
importance_sample=self.importance_sample)
|
442 |
+
|
443 |
+
|
444 |
+
sample['rays'] = sample_rays
|
445 |
+
|
446 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_blend_mix.py
ADDED
@@ -0,0 +1,439 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 16
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow, 'r') as f:
|
96 |
+
narrow_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
99 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
100 |
+
two_stage_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance*len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
if idx % 16 < 8: # gt image as target
|
208 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
|
223 |
+
img = Image.open(img_filename)
|
224 |
+
|
225 |
+
img = self.transform(img) # (4, h, w)
|
226 |
+
|
227 |
+
|
228 |
+
if img.shape[0] == 4:
|
229 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
230 |
+
imgs += [img]
|
231 |
+
|
232 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
233 |
+
mask_h = depth_h > 0
|
234 |
+
# print("valid pixels", np.sum(mask_h))
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
else:
|
241 |
+
idx = idx % self.imgs_per_instance - 8 # [0, 7]
|
242 |
+
c2w = self.c2ws[idx + 40]
|
243 |
+
w2c = np.linalg.inv(c2w)
|
244 |
+
w2c_ref = w2c
|
245 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
246 |
+
|
247 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
248 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
249 |
+
|
250 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
|
251 |
+
|
252 |
+
|
253 |
+
img = Image.open(img_filename)
|
254 |
+
img = self.transform(img) # (4, h, w)
|
255 |
+
|
256 |
+
# print("img_pre", img.shape)
|
257 |
+
if img.shape[0] == 4:
|
258 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
259 |
+
# print("img", img.shape)
|
260 |
+
imgs += [img]
|
261 |
+
|
262 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
263 |
+
depth_h = depth_h.fill_(-1.0)
|
264 |
+
|
265 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
266 |
+
depths_h.append(depth_h)
|
267 |
+
masks_h.append(mask_h)
|
268 |
+
|
269 |
+
intrinsic = self.intrinsic
|
270 |
+
intrinsics.append(intrinsic)
|
271 |
+
|
272 |
+
|
273 |
+
near_fars.append(self.near_fars[idx])
|
274 |
+
image_perm = 0 # only supervised on reference view
|
275 |
+
|
276 |
+
mask_dilated = None
|
277 |
+
if_use_narrow = []
|
278 |
+
if self.split == 'train':
|
279 |
+
for i in range(8):
|
280 |
+
if np.random.random() > 0.5:
|
281 |
+
if_use_narrow.append(True) # use narrow
|
282 |
+
else:
|
283 |
+
if_use_narrow.append(False) # 2-stage prediction
|
284 |
+
if_use_narrow[origin_idx % 8] = True if (origin_idx % 16) < 8 else False
|
285 |
+
else:
|
286 |
+
for i in range(8):
|
287 |
+
if_use_narrow.append( True if (origin_idx % 16) < 8 else False)
|
288 |
+
src_views = range(8, 8 + 8 * 4)
|
289 |
+
src_views_used = []
|
290 |
+
for vid in src_views:
|
291 |
+
src_views_used.append(vid)
|
292 |
+
cur_view_id = (vid - 8) // 4 # [0, 7]
|
293 |
+
# choose narrow
|
294 |
+
if if_use_narrow[cur_view_id]:
|
295 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{cur_view_id}_{vid%4}_10.png')
|
296 |
+
else: # choose 2-stage
|
297 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{cur_view_id}_{(vid) % 4 + 1}.png')
|
298 |
+
|
299 |
+
img = Image.open(img_filename)
|
300 |
+
img_wh = self.img_wh
|
301 |
+
|
302 |
+
img = self.transform(img)
|
303 |
+
if img.shape[0] == 4:
|
304 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
305 |
+
|
306 |
+
imgs += [img]
|
307 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
308 |
+
depths_h.append(depth_h)
|
309 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
310 |
+
|
311 |
+
near_fars.append(self.all_near_fars[vid])
|
312 |
+
intrinsics.append(self.all_intrinsics[vid])
|
313 |
+
|
314 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
315 |
+
|
316 |
+
|
317 |
+
|
318 |
+
|
319 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
320 |
+
img_hw=[img_wh[1], img_wh[0]],
|
321 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
322 |
+
near_fars=near_fars, factor=1.1
|
323 |
+
)
|
324 |
+
|
325 |
+
|
326 |
+
new_near_fars = []
|
327 |
+
new_w2cs = []
|
328 |
+
new_c2ws = []
|
329 |
+
new_affine_mats = []
|
330 |
+
new_depths_h = []
|
331 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
332 |
+
|
333 |
+
P = intrinsic @ extrinsic @ scale_mat
|
334 |
+
P = P[:3, :4]
|
335 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
336 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
337 |
+
w2c = np.linalg.inv(c2w)
|
338 |
+
new_w2cs.append(w2c)
|
339 |
+
new_c2ws.append(c2w)
|
340 |
+
affine_mat = np.eye(4)
|
341 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
342 |
+
new_affine_mats.append(affine_mat)
|
343 |
+
|
344 |
+
camera_o = c2w[:3, 3]
|
345 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
346 |
+
near = dist - 1
|
347 |
+
far = dist + 1
|
348 |
+
|
349 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
350 |
+
|
351 |
+
new_depths_h.append(depth * scale_factor)
|
352 |
+
|
353 |
+
# print(new_near_fars)
|
354 |
+
# print("img numeber: ", len(imgs))
|
355 |
+
imgs = torch.stack(imgs).float()
|
356 |
+
depths_h = np.stack(new_depths_h)
|
357 |
+
masks_h = np.stack(masks_h)
|
358 |
+
|
359 |
+
affine_mats = np.stack(new_affine_mats)
|
360 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
361 |
+
new_near_fars)
|
362 |
+
|
363 |
+
if self.split == 'train':
|
364 |
+
start_idx = 0
|
365 |
+
else:
|
366 |
+
start_idx = 1
|
367 |
+
|
368 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
369 |
+
sample['origin_idx'] = origin_idx
|
370 |
+
sample['images'] = imgs # (V, 3, H, W)
|
371 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
372 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
373 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
374 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
375 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
376 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
377 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
378 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
379 |
+
|
380 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
381 |
+
sample['scan'] = folder_id
|
382 |
+
|
383 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
384 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
385 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
386 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
387 |
+
if view_ids[0] < 8:
|
388 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
389 |
+
else:
|
390 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
391 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
392 |
+
|
393 |
+
|
394 |
+
# - image to render
|
395 |
+
sample['query_image'] = sample['images'][0]
|
396 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
397 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
398 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
399 |
+
sample['query_depth'] = sample['depths_h'][0]
|
400 |
+
sample['query_mask'] = sample['masks_h'][0]
|
401 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
402 |
+
|
403 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
404 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
405 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
406 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
407 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
408 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
409 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
410 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
411 |
+
|
412 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
413 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
414 |
+
|
415 |
+
# - generate rays
|
416 |
+
if ('val' in self.split) or ('test' in self.split):
|
417 |
+
sample_rays = gen_rays_from_single_image(
|
418 |
+
img_wh[1], img_wh[0],
|
419 |
+
sample['query_image'],
|
420 |
+
sample['query_intrinsic'],
|
421 |
+
sample['query_c2w'],
|
422 |
+
depth=sample['query_depth'],
|
423 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
424 |
+
else:
|
425 |
+
sample_rays = gen_random_rays_from_single_image(
|
426 |
+
img_wh[1], img_wh[0],
|
427 |
+
self.N_rays,
|
428 |
+
sample['query_image'],
|
429 |
+
sample['query_intrinsic'],
|
430 |
+
sample['query_c2w'],
|
431 |
+
depth=sample['query_depth'],
|
432 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
433 |
+
dilated_mask=mask_dilated,
|
434 |
+
importance_sample=self.importance_sample)
|
435 |
+
|
436 |
+
|
437 |
+
sample['rays'] = sample_rays
|
438 |
+
|
439 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_narrow_and_8_2_stage_mix.py
ADDED
@@ -0,0 +1,470 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
self.imgs_per_instance = 16
|
72 |
+
self.n_views = n_views
|
73 |
+
self.N_rays = N_rays
|
74 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
75 |
+
|
76 |
+
self.clean_image = clean_image
|
77 |
+
self.importance_sample = importance_sample
|
78 |
+
self.test_ref_views = test_ref_views # used for testing
|
79 |
+
self.scale_factor = 1.0
|
80 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
81 |
+
|
82 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
83 |
+
with open(lvis_json_path, 'r') as f:
|
84 |
+
lvis_paths = json.load(f)
|
85 |
+
if self.split == 'train':
|
86 |
+
self.lvis_paths = lvis_paths['train']
|
87 |
+
else:
|
88 |
+
self.lvis_paths = lvis_paths['val']
|
89 |
+
if img_wh is not None:
|
90 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
91 |
+
'img_wh must both be multiples of 32!'
|
92 |
+
|
93 |
+
|
94 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
95 |
+
with open(pose_json_path_narrow, 'r') as f:
|
96 |
+
narrow_meta = json.load(f)
|
97 |
+
|
98 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
99 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
100 |
+
two_stage_meta = json.load(f)
|
101 |
+
|
102 |
+
|
103 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
|
104 |
+
self.img_wh = (256, 256)
|
105 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
106 |
+
intrinsic = np.eye(4)
|
107 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
108 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
109 |
+
self.intrinsic = intrinsic
|
110 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
111 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
112 |
+
self.near_far[1] = 1.8
|
113 |
+
self.define_transforms()
|
114 |
+
self.blender2opencv = np.array(
|
115 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
self.c2ws = []
|
120 |
+
self.w2cs = []
|
121 |
+
self.near_fars = []
|
122 |
+
for idx, img_id in enumerate(self.img_ids):
|
123 |
+
pose = self.input_poses[idx]
|
124 |
+
c2w = pose @ self.blender2opencv
|
125 |
+
self.c2ws.append(c2w)
|
126 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
127 |
+
self.near_fars.append(self.near_far)
|
128 |
+
|
129 |
+
|
130 |
+
|
131 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
132 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
133 |
+
|
134 |
+
|
135 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
136 |
+
self.all_extrinsics = []
|
137 |
+
self.all_near_fars = []
|
138 |
+
self.load_cam_info()
|
139 |
+
|
140 |
+
# * bounding box for rendering
|
141 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
142 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
143 |
+
|
144 |
+
# - used for cost volume regularization
|
145 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
146 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
147 |
+
|
148 |
+
|
149 |
+
def define_transforms(self):
|
150 |
+
self.transform = T.Compose([T.ToTensor()])
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def load_cam_info(self):
|
155 |
+
for vid, img_id in enumerate(self.img_ids):
|
156 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
157 |
+
self.all_intrinsics.append(intrinsic)
|
158 |
+
self.all_extrinsics.append(extrinsic)
|
159 |
+
self.all_near_fars.append(near_far)
|
160 |
+
|
161 |
+
def read_depth(self, filename):
|
162 |
+
pass
|
163 |
+
|
164 |
+
def read_mask(self, filename):
|
165 |
+
mask_h = cv2.imread(filename, 0)
|
166 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
167 |
+
interpolation=cv2.INTER_NEAREST)
|
168 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
|
171 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
172 |
+
mask_h[mask_h > 0] = 1
|
173 |
+
|
174 |
+
return mask, mask_h
|
175 |
+
|
176 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
177 |
+
|
178 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
179 |
+
|
180 |
+
radius = radius * factor
|
181 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
182 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
183 |
+
scale_mat = scale_mat.astype(np.float32)
|
184 |
+
|
185 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
186 |
+
|
187 |
+
def __len__(self):
|
188 |
+
return self.imgs_per_instance * len(self.lvis_paths)
|
189 |
+
|
190 |
+
|
191 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
192 |
+
pass
|
193 |
+
|
194 |
+
|
195 |
+
def __getitem__(self, idx):
|
196 |
+
sample = {}
|
197 |
+
origin_idx = idx
|
198 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
199 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
200 |
+
idx_original=idx
|
201 |
+
|
202 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
203 |
+
|
204 |
+
folder_id = folder_uid_dict['folder_id']
|
205 |
+
uid = folder_uid_dict['uid']
|
206 |
+
|
207 |
+
if idx % self.imgs_per_instance < 8:
|
208 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
209 |
+
# target view
|
210 |
+
c2w = self.c2ws[idx]
|
211 |
+
w2c = np.linalg.inv(c2w)
|
212 |
+
w2c_ref = w2c
|
213 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
214 |
+
|
215 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
216 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
217 |
+
|
218 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
219 |
+
|
220 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
221 |
+
|
222 |
+
|
223 |
+
img = Image.open(img_filename)
|
224 |
+
|
225 |
+
img = self.transform(img) # (4, h, w)
|
226 |
+
|
227 |
+
|
228 |
+
if img.shape[0] == 4:
|
229 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
230 |
+
imgs += [img]
|
231 |
+
|
232 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
233 |
+
mask_h = depth_h > 0
|
234 |
+
# print("valid pixels", np.sum(mask_h))
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
|
241 |
+
depths_h.append(depth_h)
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
intrinsic = self.intrinsic
|
245 |
+
intrinsics.append(intrinsic)
|
246 |
+
|
247 |
+
|
248 |
+
near_fars.append(self.near_fars[idx])
|
249 |
+
image_perm = 0 # only supervised on reference view
|
250 |
+
|
251 |
+
mask_dilated = None
|
252 |
+
|
253 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
254 |
+
|
255 |
+
src_views = range(8, 8 + 8 * 4)
|
256 |
+
src_views_used = []
|
257 |
+
for vid in src_views:
|
258 |
+
src_views_used.append(vid)
|
259 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
260 |
+
|
261 |
+
img = Image.open(img_filename)
|
262 |
+
img_wh = self.img_wh
|
263 |
+
|
264 |
+
img = self.transform(img)
|
265 |
+
if img.shape[0] == 4:
|
266 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
267 |
+
|
268 |
+
imgs += [img]
|
269 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
270 |
+
depths_h.append(depth_h)
|
271 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
272 |
+
|
273 |
+
near_fars.append(self.all_near_fars[vid])
|
274 |
+
intrinsics.append(self.all_intrinsics[vid])
|
275 |
+
|
276 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
277 |
+
|
278 |
+
else:
|
279 |
+
idx = idx % self.imgs_per_instance - 8 # [0, 5]
|
280 |
+
|
281 |
+
c2w = self.c2ws[idx + 40]
|
282 |
+
w2c = np.linalg.inv(c2w)
|
283 |
+
w2c_ref = w2c
|
284 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
285 |
+
|
286 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
287 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
288 |
+
|
289 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_0.png')
|
290 |
+
|
291 |
+
|
292 |
+
img = Image.open(img_filename)
|
293 |
+
|
294 |
+
img = self.transform(img) # (4, h, w)
|
295 |
+
|
296 |
+
# print("img_pre", img.shape)
|
297 |
+
if img.shape[0] == 4:
|
298 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
299 |
+
# print("img", img.shape)
|
300 |
+
imgs += [img]
|
301 |
+
|
302 |
+
|
303 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
304 |
+
depth_h = depth_h.fill_(-1.0)
|
305 |
+
# depth_h = torch.fill((img.shape[1], img.shape[2]), -1.0)
|
306 |
+
# print("depth_h", depth_h.shape)
|
307 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
308 |
+
depths_h.append(depth_h)
|
309 |
+
masks_h.append(mask_h)
|
310 |
+
|
311 |
+
intrinsic = self.intrinsic
|
312 |
+
intrinsics.append(intrinsic)
|
313 |
+
|
314 |
+
|
315 |
+
near_fars.append(self.near_fars[idx])
|
316 |
+
image_perm = 0 # only supervised on reference view
|
317 |
+
|
318 |
+
mask_dilated = None
|
319 |
+
|
320 |
+
|
321 |
+
|
322 |
+
src_views = range(40+8, 40+8+32)
|
323 |
+
src_views_used = []
|
324 |
+
for vid in src_views:
|
325 |
+
view_dix_to_use = (vid - 40 - 8) // 4
|
326 |
+
|
327 |
+
src_views_used.append(vid)
|
328 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{idx}_{(vid-48) % 4 + 1}.png')
|
329 |
+
|
330 |
+
img = Image.open(img_filename)
|
331 |
+
img_wh = self.img_wh
|
332 |
+
|
333 |
+
img = self.transform(img)
|
334 |
+
# print("img shape1: ", img.shape)
|
335 |
+
if img.shape[0] == 4:
|
336 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
337 |
+
# print("img shape2: ", img.shape)
|
338 |
+
imgs += [img]
|
339 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
340 |
+
depth_h = depth_h.fill_(-1.0)
|
341 |
+
depths_h.append(depth_h)
|
342 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
343 |
+
|
344 |
+
near_fars.append(self.all_near_fars[vid])
|
345 |
+
intrinsics.append(self.all_intrinsics[vid])
|
346 |
+
|
347 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
348 |
+
|
349 |
+
|
350 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
351 |
+
img_hw=[img_wh[1], img_wh[0]],
|
352 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
353 |
+
near_fars=near_fars, factor=1.1
|
354 |
+
)
|
355 |
+
|
356 |
+
|
357 |
+
new_near_fars = []
|
358 |
+
new_w2cs = []
|
359 |
+
new_c2ws = []
|
360 |
+
new_affine_mats = []
|
361 |
+
new_depths_h = []
|
362 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
363 |
+
|
364 |
+
P = intrinsic @ extrinsic @ scale_mat
|
365 |
+
P = P[:3, :4]
|
366 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
367 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
368 |
+
w2c = np.linalg.inv(c2w)
|
369 |
+
new_w2cs.append(w2c)
|
370 |
+
new_c2ws.append(c2w)
|
371 |
+
affine_mat = np.eye(4)
|
372 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
373 |
+
new_affine_mats.append(affine_mat)
|
374 |
+
|
375 |
+
camera_o = c2w[:3, 3]
|
376 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
377 |
+
near = dist - 1
|
378 |
+
far = dist + 1
|
379 |
+
|
380 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
381 |
+
|
382 |
+
new_depths_h.append(depth * scale_factor)
|
383 |
+
|
384 |
+
# print(new_near_fars)
|
385 |
+
# print("img numeber: ", len(imgs))
|
386 |
+
imgs = torch.stack(imgs).float()
|
387 |
+
depths_h = np.stack(new_depths_h)
|
388 |
+
masks_h = np.stack(masks_h)
|
389 |
+
|
390 |
+
affine_mats = np.stack(new_affine_mats)
|
391 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
392 |
+
new_near_fars)
|
393 |
+
|
394 |
+
if self.split == 'train':
|
395 |
+
start_idx = 0
|
396 |
+
else:
|
397 |
+
start_idx = 1
|
398 |
+
|
399 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
400 |
+
sample['origin_idx'] = origin_idx
|
401 |
+
sample['images'] = imgs # (V, 3, H, W)
|
402 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
403 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
404 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
405 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
406 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
407 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
408 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
409 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
410 |
+
|
411 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
412 |
+
sample['scan'] = folder_id
|
413 |
+
|
414 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
415 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
416 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
417 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
418 |
+
if view_ids[0] < 8:
|
419 |
+
meta_end = "_narrow"+ "_refview" + str(view_ids[0])
|
420 |
+
else:
|
421 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
422 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
423 |
+
|
424 |
+
|
425 |
+
# - image to render
|
426 |
+
sample['query_image'] = sample['images'][0]
|
427 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
428 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
429 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
430 |
+
sample['query_depth'] = sample['depths_h'][0]
|
431 |
+
sample['query_mask'] = sample['masks_h'][0]
|
432 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
433 |
+
|
434 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
435 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
436 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
437 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
438 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
439 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
440 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
441 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
442 |
+
|
443 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
444 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
445 |
+
|
446 |
+
# - generate rays
|
447 |
+
if ('val' in self.split) or ('test' in self.split):
|
448 |
+
sample_rays = gen_rays_from_single_image(
|
449 |
+
img_wh[1], img_wh[0],
|
450 |
+
sample['query_image'],
|
451 |
+
sample['query_intrinsic'],
|
452 |
+
sample['query_c2w'],
|
453 |
+
depth=sample['query_depth'],
|
454 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
455 |
+
else:
|
456 |
+
sample_rays = gen_random_rays_from_single_image(
|
457 |
+
img_wh[1], img_wh[0],
|
458 |
+
self.N_rays,
|
459 |
+
sample['query_image'],
|
460 |
+
sample['query_intrinsic'],
|
461 |
+
sample['query_c2w'],
|
462 |
+
depth=sample['query_depth'],
|
463 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
464 |
+
dilated_mask=mask_dilated,
|
465 |
+
importance_sample=self.importance_sample)
|
466 |
+
|
467 |
+
|
468 |
+
sample['rays'] = sample_rays
|
469 |
+
|
470 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_8_wide_from_2_stage.py
ADDED
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
self.root_dir = root_dir
|
70 |
+
self.split = split
|
71 |
+
|
72 |
+
self.imgs_per_instance = 8
|
73 |
+
|
74 |
+
self.n_views = n_views
|
75 |
+
self.N_rays = N_rays
|
76 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
77 |
+
|
78 |
+
self.clean_image = clean_image
|
79 |
+
self.importance_sample = importance_sample
|
80 |
+
self.test_ref_views = test_ref_views # used for testing
|
81 |
+
self.scale_factor = 1.0
|
82 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
83 |
+
|
84 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
|
85 |
+
with open(lvis_json_path, 'r') as f:
|
86 |
+
lvis_paths = json.load(f)
|
87 |
+
if self.split == 'train':
|
88 |
+
self.lvis_paths = lvis_paths['train']
|
89 |
+
else:
|
90 |
+
self.lvis_paths = lvis_paths['val']
|
91 |
+
if img_wh is not None:
|
92 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
93 |
+
'img_wh must both be multiples of 32!'
|
94 |
+
|
95 |
+
|
96 |
+
pose_json_path_narrow = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
97 |
+
with open(pose_json_path_narrow, 'r') as f:
|
98 |
+
narrow_meta = json.load(f)
|
99 |
+
|
100 |
+
pose_json_path_two_stage = "/objaverse-processed/zero12345_img/zero12345_2stage_8_pose.json"
|
101 |
+
with open(pose_json_path_two_stage, 'r') as f:
|
102 |
+
two_stage_meta = json.load(f)
|
103 |
+
|
104 |
+
|
105 |
+
self.img_ids = list(narrow_meta["c2ws"].keys()) + list(two_stage_meta["c2ws"].keys()) # (8 + 8*4) + (8 + 8*4)
|
106 |
+
self.img_wh = (256, 256)
|
107 |
+
self.input_poses = np.array(list(narrow_meta["c2ws"].values()) + list(two_stage_meta["c2ws"].values()))
|
108 |
+
intrinsic = np.eye(4)
|
109 |
+
assert narrow_meta["intrinsics"] == two_stage_meta["intrinsics"], "intrinsics not equal"
|
110 |
+
intrinsic[:3, :3] = np.array(narrow_meta["intrinsics"])
|
111 |
+
self.intrinsic = intrinsic
|
112 |
+
assert narrow_meta["near_far"] == two_stage_meta["near_far"], "near_far not equal"
|
113 |
+
self.near_far = np.array(narrow_meta["near_far"])
|
114 |
+
self.near_far[1] = 1.8
|
115 |
+
self.define_transforms()
|
116 |
+
self.blender2opencv = np.array(
|
117 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
self.c2ws = []
|
122 |
+
self.w2cs = []
|
123 |
+
self.near_fars = []
|
124 |
+
for idx, img_id in enumerate(self.img_ids):
|
125 |
+
pose = self.input_poses[idx]
|
126 |
+
c2w = pose @ self.blender2opencv
|
127 |
+
self.c2ws.append(c2w)
|
128 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
129 |
+
self.near_fars.append(self.near_far)
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
134 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
135 |
+
|
136 |
+
|
137 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
138 |
+
self.all_extrinsics = []
|
139 |
+
self.all_near_fars = []
|
140 |
+
self.load_cam_info()
|
141 |
+
|
142 |
+
# * bounding box for rendering
|
143 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
144 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
145 |
+
|
146 |
+
# - used for cost volume regularization
|
147 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
148 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
149 |
+
|
150 |
+
|
151 |
+
def define_transforms(self):
|
152 |
+
self.transform = T.Compose([T.ToTensor()])
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
def load_cam_info(self):
|
157 |
+
for vid, img_id in enumerate(self.img_ids):
|
158 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
159 |
+
self.all_intrinsics.append(intrinsic)
|
160 |
+
self.all_extrinsics.append(extrinsic)
|
161 |
+
self.all_near_fars.append(near_far)
|
162 |
+
|
163 |
+
def read_depth(self, filename):
|
164 |
+
pass
|
165 |
+
|
166 |
+
def read_mask(self, filename):
|
167 |
+
mask_h = cv2.imread(filename, 0)
|
168 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
169 |
+
interpolation=cv2.INTER_NEAREST)
|
170 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
171 |
+
interpolation=cv2.INTER_NEAREST)
|
172 |
+
|
173 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
174 |
+
mask_h[mask_h > 0] = 1
|
175 |
+
|
176 |
+
return mask, mask_h
|
177 |
+
|
178 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
179 |
+
|
180 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
181 |
+
|
182 |
+
radius = radius * factor
|
183 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
184 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
185 |
+
scale_mat = scale_mat.astype(np.float32)
|
186 |
+
|
187 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
188 |
+
|
189 |
+
def __len__(self):
|
190 |
+
return self.imgs_per_instance * len(self.lvis_paths)
|
191 |
+
|
192 |
+
|
193 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
194 |
+
pass
|
195 |
+
|
196 |
+
|
197 |
+
def __getitem__(self, idx):
|
198 |
+
sample = {}
|
199 |
+
origin_idx = idx
|
200 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
201 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
202 |
+
idx_original=idx
|
203 |
+
|
204 |
+
folder_uid_dict = self.lvis_paths[idx//self.imgs_per_instance]
|
205 |
+
|
206 |
+
folder_id = folder_uid_dict['folder_id']
|
207 |
+
uid = folder_uid_dict['uid']
|
208 |
+
|
209 |
+
idx = idx % self.imgs_per_instance # [0, 7]
|
210 |
+
# target view
|
211 |
+
c2w = self.c2ws[idx]
|
212 |
+
w2c = np.linalg.inv(c2w)
|
213 |
+
w2c_ref = w2c
|
214 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
215 |
+
|
216 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
217 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
218 |
+
|
219 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
220 |
+
|
221 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
222 |
+
|
223 |
+
|
224 |
+
img = Image.open(img_filename)
|
225 |
+
|
226 |
+
img = self.transform(img) # (4, h, w)
|
227 |
+
|
228 |
+
|
229 |
+
if img.shape[0] == 4:
|
230 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
231 |
+
imgs += [img]
|
232 |
+
|
233 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
234 |
+
mask_h = depth_h > 0
|
235 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
236 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
237 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
238 |
+
depth_h = distance
|
239 |
+
|
240 |
+
|
241 |
+
depths_h.append(depth_h)
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
intrinsic = self.intrinsic
|
245 |
+
intrinsics.append(intrinsic)
|
246 |
+
|
247 |
+
|
248 |
+
near_fars.append(self.near_fars[idx])
|
249 |
+
image_perm = 0 # only supervised on reference view
|
250 |
+
|
251 |
+
mask_dilated = None
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
src_views = range(0, 8)
|
256 |
+
src_views_used = []
|
257 |
+
for vid in src_views:
|
258 |
+
src_views_used.append(vid)
|
259 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_2stage_8/", folder_id, uid, f'view_0_{vid}_0.png')
|
260 |
+
|
261 |
+
img = Image.open(img_filename)
|
262 |
+
img_wh = self.img_wh
|
263 |
+
|
264 |
+
img = self.transform(img)
|
265 |
+
if img.shape[0] == 4:
|
266 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
267 |
+
imgs += [img]
|
268 |
+
depth_h =torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
269 |
+
depth_h = depth_h.fill_(-1.0)
|
270 |
+
depths_h.append(depth_h)
|
271 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
272 |
+
|
273 |
+
near_fars.append(self.all_near_fars[vid])
|
274 |
+
intrinsics.append(self.all_intrinsics[vid])
|
275 |
+
|
276 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
277 |
+
|
278 |
+
|
279 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
280 |
+
img_hw=[img_wh[1], img_wh[0]],
|
281 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
282 |
+
near_fars=near_fars, factor=1.1
|
283 |
+
)
|
284 |
+
|
285 |
+
|
286 |
+
new_near_fars = []
|
287 |
+
new_w2cs = []
|
288 |
+
new_c2ws = []
|
289 |
+
new_affine_mats = []
|
290 |
+
new_depths_h = []
|
291 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
292 |
+
|
293 |
+
P = intrinsic @ extrinsic @ scale_mat
|
294 |
+
P = P[:3, :4]
|
295 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
296 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
297 |
+
w2c = np.linalg.inv(c2w)
|
298 |
+
new_w2cs.append(w2c)
|
299 |
+
new_c2ws.append(c2w)
|
300 |
+
affine_mat = np.eye(4)
|
301 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
302 |
+
new_affine_mats.append(affine_mat)
|
303 |
+
|
304 |
+
camera_o = c2w[:3, 3]
|
305 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
306 |
+
near = dist - 1
|
307 |
+
far = dist + 1
|
308 |
+
|
309 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
310 |
+
|
311 |
+
new_depths_h.append(depth * scale_factor)
|
312 |
+
|
313 |
+
|
314 |
+
imgs = torch.stack(imgs).float()
|
315 |
+
depths_h = np.stack(new_depths_h)
|
316 |
+
masks_h = np.stack(masks_h)
|
317 |
+
|
318 |
+
affine_mats = np.stack(new_affine_mats)
|
319 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
320 |
+
new_near_fars)
|
321 |
+
|
322 |
+
if self.split == 'train':
|
323 |
+
start_idx = 0
|
324 |
+
else:
|
325 |
+
start_idx = 1
|
326 |
+
|
327 |
+
view_ids = [idx_original % self.imgs_per_instance] + src_views_used
|
328 |
+
sample['origin_idx'] = origin_idx
|
329 |
+
sample['images'] = imgs # (V, 3, H, W)
|
330 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
331 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
332 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
333 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
334 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
335 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
336 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
337 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
338 |
+
|
339 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
340 |
+
sample['scan'] = folder_id
|
341 |
+
|
342 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
343 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
344 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
345 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
346 |
+
meta_end = "_two_stage"+ "_refview" + str(view_ids[0] - 8)
|
347 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + meta_end
|
348 |
+
|
349 |
+
|
350 |
+
# - image to render
|
351 |
+
sample['query_image'] = sample['images'][0]
|
352 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
353 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
354 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
355 |
+
sample['query_depth'] = sample['depths_h'][0]
|
356 |
+
sample['query_mask'] = sample['masks_h'][0]
|
357 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
358 |
+
|
359 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
360 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
361 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
362 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
363 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
364 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
365 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
366 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
367 |
+
|
368 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
369 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
370 |
+
|
371 |
+
# - generate rays
|
372 |
+
if ('val' in self.split) or ('test' in self.split):
|
373 |
+
sample_rays = gen_rays_from_single_image(
|
374 |
+
img_wh[1], img_wh[0],
|
375 |
+
sample['query_image'],
|
376 |
+
sample['query_intrinsic'],
|
377 |
+
sample['query_c2w'],
|
378 |
+
depth=sample['query_depth'],
|
379 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
380 |
+
else:
|
381 |
+
sample_rays = gen_random_rays_from_single_image(
|
382 |
+
img_wh[1], img_wh[0],
|
383 |
+
self.N_rays,
|
384 |
+
sample['query_image'],
|
385 |
+
sample['query_intrinsic'],
|
386 |
+
sample['query_c2w'],
|
387 |
+
depth=sample['query_depth'],
|
388 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
389 |
+
dilated_mask=mask_dilated,
|
390 |
+
importance_sample=self.importance_sample)
|
391 |
+
|
392 |
+
|
393 |
+
sample['rays'] = sample_rays
|
394 |
+
|
395 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_4_1_eval_new_data.py
ADDED
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
return 8*len(self.lvis_paths)
|
161 |
+
# return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
# idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
# src_views = range(8, 8 + 8 * 4)
|
258 |
+
src_views = range(8+idx*4, 8+(idx+1)*4)
|
259 |
+
for vid in src_views:
|
260 |
+
|
261 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
262 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
263 |
+
img = Image.open(img_filename)
|
264 |
+
img_wh = self.img_wh
|
265 |
+
|
266 |
+
img = self.transform(img)
|
267 |
+
if img.shape[0] == 4:
|
268 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
269 |
+
|
270 |
+
imgs += [img]
|
271 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
272 |
+
depths_h.append(depth_h)
|
273 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
274 |
+
|
275 |
+
near_fars.append(self.all_near_fars[vid])
|
276 |
+
intrinsics.append(self.all_intrinsics[vid])
|
277 |
+
|
278 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
279 |
+
|
280 |
+
|
281 |
+
# ! estimate scale_mat
|
282 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
283 |
+
img_hw=[img_wh[1], img_wh[0]],
|
284 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
285 |
+
near_fars=near_fars, factor=1.1
|
286 |
+
)
|
287 |
+
|
288 |
+
|
289 |
+
new_near_fars = []
|
290 |
+
new_w2cs = []
|
291 |
+
new_c2ws = []
|
292 |
+
new_affine_mats = []
|
293 |
+
new_depths_h = []
|
294 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
295 |
+
|
296 |
+
P = intrinsic @ extrinsic @ scale_mat
|
297 |
+
P = P[:3, :4]
|
298 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
299 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
300 |
+
w2c = np.linalg.inv(c2w)
|
301 |
+
new_w2cs.append(w2c)
|
302 |
+
new_c2ws.append(c2w)
|
303 |
+
affine_mat = np.eye(4)
|
304 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
305 |
+
new_affine_mats.append(affine_mat)
|
306 |
+
|
307 |
+
camera_o = c2w[:3, 3]
|
308 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
309 |
+
near = dist - 1
|
310 |
+
far = dist + 1
|
311 |
+
|
312 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
313 |
+
new_depths_h.append(depth * scale_factor)
|
314 |
+
|
315 |
+
# print(new_near_fars)
|
316 |
+
imgs = torch.stack(imgs).float()
|
317 |
+
depths_h = np.stack(new_depths_h)
|
318 |
+
masks_h = np.stack(masks_h)
|
319 |
+
|
320 |
+
affine_mats = np.stack(new_affine_mats)
|
321 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
322 |
+
new_near_fars)
|
323 |
+
|
324 |
+
if self.split == 'train':
|
325 |
+
start_idx = 0
|
326 |
+
else:
|
327 |
+
start_idx = 1
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
target_w2cs = []
|
332 |
+
target_intrinsics = []
|
333 |
+
new_target_w2cs = []
|
334 |
+
for i_idx in range(8):
|
335 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
336 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
337 |
+
|
338 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
339 |
+
|
340 |
+
P = intrinsic @ extrinsic @ scale_mat
|
341 |
+
P = P[:3, :4]
|
342 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
343 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
344 |
+
w2c = np.linalg.inv(c2w)
|
345 |
+
new_target_w2cs.append(w2c)
|
346 |
+
target_w2cs = np.stack(new_target_w2cs)
|
347 |
+
|
348 |
+
|
349 |
+
|
350 |
+
view_ids = [idx] + list(src_views)
|
351 |
+
sample['origin_idx'] = origin_idx
|
352 |
+
sample['images'] = imgs # (V, 3, H, W)
|
353 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
354 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
355 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
356 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
357 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
358 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
359 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
360 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
361 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
362 |
+
|
363 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
364 |
+
sample['scan'] = shape_name
|
365 |
+
|
366 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
367 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
368 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
369 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
370 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
371 |
+
# print("meta: ", sample['meta'])
|
372 |
+
|
373 |
+
# - image to render
|
374 |
+
sample['query_image'] = sample['images'][0]
|
375 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
376 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
377 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
378 |
+
sample['query_depth'] = sample['depths_h'][0]
|
379 |
+
sample['query_mask'] = sample['masks_h'][0]
|
380 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
381 |
+
|
382 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
383 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
384 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
385 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
386 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
387 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
388 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
389 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
390 |
+
|
391 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
392 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
393 |
+
|
394 |
+
# - generate rays
|
395 |
+
if ('val' in self.split) or ('test' in self.split):
|
396 |
+
sample_rays = gen_rays_from_single_image(
|
397 |
+
img_wh[1], img_wh[0],
|
398 |
+
sample['query_image'],
|
399 |
+
sample['query_intrinsic'],
|
400 |
+
sample['query_c2w'],
|
401 |
+
depth=sample['query_depth'],
|
402 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
403 |
+
else:
|
404 |
+
sample_rays = gen_random_rays_from_single_image(
|
405 |
+
img_wh[1], img_wh[0],
|
406 |
+
self.N_rays,
|
407 |
+
sample['query_image'],
|
408 |
+
sample['query_intrinsic'],
|
409 |
+
sample['query_c2w'],
|
410 |
+
depth=sample['query_depth'],
|
411 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
412 |
+
dilated_mask=mask_dilated,
|
413 |
+
importance_sample=self.importance_sample)
|
414 |
+
|
415 |
+
|
416 |
+
sample['rays'] = sample_rays
|
417 |
+
|
418 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_6.py
ADDED
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
if self.split == 'train':
|
184 |
+
return 6*len(self.lvis_paths)
|
185 |
+
else:
|
186 |
+
return 8*len(self.lvis_paths)
|
187 |
+
|
188 |
+
|
189 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
190 |
+
pass
|
191 |
+
|
192 |
+
|
193 |
+
def __getitem__(self, idx):
|
194 |
+
sample = {}
|
195 |
+
origin_idx = idx
|
196 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
197 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
198 |
+
|
199 |
+
if self.split == 'train':
|
200 |
+
folder_uid_dict = self.lvis_paths[idx//6]
|
201 |
+
idx = idx % 6 # [0, 5]
|
202 |
+
if idx == 4:
|
203 |
+
idx = 5
|
204 |
+
elif idx == 5:
|
205 |
+
idx = 7
|
206 |
+
else:
|
207 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
208 |
+
idx = idx % 8 # [0, 7]
|
209 |
+
|
210 |
+
folder_id = folder_uid_dict['folder_id']
|
211 |
+
uid = folder_uid_dict['uid']
|
212 |
+
|
213 |
+
|
214 |
+
# target view
|
215 |
+
c2w = self.c2ws[idx]
|
216 |
+
w2c = np.linalg.inv(c2w)
|
217 |
+
w2c_ref = w2c
|
218 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
219 |
+
|
220 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
221 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
222 |
+
|
223 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
224 |
+
|
225 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
226 |
+
|
227 |
+
|
228 |
+
img = Image.open(img_filename)
|
229 |
+
|
230 |
+
img = self.transform(img) # (4, h, w)
|
231 |
+
|
232 |
+
|
233 |
+
if img.shape[0] == 4:
|
234 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
235 |
+
imgs += [img]
|
236 |
+
|
237 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
238 |
+
mask_h = depth_h > 0
|
239 |
+
# print("valid pixels", np.sum(mask_h))
|
240 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
241 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
242 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
243 |
+
depth_h = distance
|
244 |
+
|
245 |
+
|
246 |
+
depths_h.append(depth_h)
|
247 |
+
masks_h.append(mask_h)
|
248 |
+
|
249 |
+
intrinsic = self.intrinsic
|
250 |
+
intrinsics.append(intrinsic)
|
251 |
+
|
252 |
+
|
253 |
+
near_fars.append(self.near_fars[idx])
|
254 |
+
image_perm = 0 # only supervised on reference view
|
255 |
+
|
256 |
+
mask_dilated = None
|
257 |
+
|
258 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
259 |
+
src_views = range(8, 8 + 8 * 4)
|
260 |
+
|
261 |
+
for vid in src_views:
|
262 |
+
if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
|
263 |
+
continue
|
264 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
265 |
+
|
266 |
+
img = Image.open(img_filename)
|
267 |
+
img_wh = self.img_wh
|
268 |
+
|
269 |
+
img = self.transform(img)
|
270 |
+
if img.shape[0] == 4:
|
271 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
272 |
+
|
273 |
+
imgs += [img]
|
274 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
275 |
+
depths_h.append(depth_h)
|
276 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
277 |
+
|
278 |
+
near_fars.append(self.all_near_fars[vid])
|
279 |
+
intrinsics.append(self.all_intrinsics[vid])
|
280 |
+
|
281 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
282 |
+
# print("len(imges)", len(imgs))
|
283 |
+
|
284 |
+
# ! estimate scale_mat
|
285 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
286 |
+
img_hw=[img_wh[1], img_wh[0]],
|
287 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
288 |
+
near_fars=near_fars, factor=1.1
|
289 |
+
)
|
290 |
+
|
291 |
+
|
292 |
+
new_near_fars = []
|
293 |
+
new_w2cs = []
|
294 |
+
new_c2ws = []
|
295 |
+
new_affine_mats = []
|
296 |
+
new_depths_h = []
|
297 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
298 |
+
|
299 |
+
P = intrinsic @ extrinsic @ scale_mat
|
300 |
+
P = P[:3, :4]
|
301 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
302 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
303 |
+
w2c = np.linalg.inv(c2w)
|
304 |
+
new_w2cs.append(w2c)
|
305 |
+
new_c2ws.append(c2w)
|
306 |
+
affine_mat = np.eye(4)
|
307 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
308 |
+
new_affine_mats.append(affine_mat)
|
309 |
+
|
310 |
+
camera_o = c2w[:3, 3]
|
311 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
312 |
+
near = dist - 1
|
313 |
+
far = dist + 1
|
314 |
+
|
315 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
316 |
+
new_depths_h.append(depth * scale_factor)
|
317 |
+
|
318 |
+
# print(new_near_fars)
|
319 |
+
imgs = torch.stack(imgs).float()
|
320 |
+
depths_h = np.stack(new_depths_h)
|
321 |
+
masks_h = np.stack(masks_h)
|
322 |
+
|
323 |
+
affine_mats = np.stack(new_affine_mats)
|
324 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
325 |
+
new_near_fars)
|
326 |
+
|
327 |
+
if self.split == 'train':
|
328 |
+
start_idx = 0
|
329 |
+
else:
|
330 |
+
start_idx = 1
|
331 |
+
|
332 |
+
view_ids = [idx] + list(src_views)
|
333 |
+
sample['origin_idx'] = origin_idx
|
334 |
+
sample['images'] = imgs # (V, 3, H, W)
|
335 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
336 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
337 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
338 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
339 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
340 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
341 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
342 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
343 |
+
|
344 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
345 |
+
sample['scan'] = folder_id
|
346 |
+
|
347 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
348 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
349 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
350 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
351 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
352 |
+
|
353 |
+
|
354 |
+
# - image to render
|
355 |
+
sample['query_image'] = sample['images'][0]
|
356 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
357 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
358 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
359 |
+
sample['query_depth'] = sample['depths_h'][0]
|
360 |
+
sample['query_mask'] = sample['masks_h'][0]
|
361 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
362 |
+
|
363 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
364 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
365 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
366 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
367 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
368 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
369 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
370 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
371 |
+
|
372 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
373 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
374 |
+
|
375 |
+
# - generate rays
|
376 |
+
if ('val' in self.split) or ('test' in self.split):
|
377 |
+
sample_rays = gen_rays_from_single_image(
|
378 |
+
img_wh[1], img_wh[0],
|
379 |
+
sample['query_image'],
|
380 |
+
sample['query_intrinsic'],
|
381 |
+
sample['query_c2w'],
|
382 |
+
depth=sample['query_depth'],
|
383 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
384 |
+
else:
|
385 |
+
sample_rays = gen_random_rays_from_single_image(
|
386 |
+
img_wh[1], img_wh[0],
|
387 |
+
self.N_rays,
|
388 |
+
sample['query_image'],
|
389 |
+
sample['query_intrinsic'],
|
390 |
+
sample['query_c2w'],
|
391 |
+
depth=sample['query_depth'],
|
392 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
393 |
+
dilated_mask=mask_dilated,
|
394 |
+
importance_sample=self.importance_sample)
|
395 |
+
|
396 |
+
|
397 |
+
sample['rays'] = sample_rays
|
398 |
+
|
399 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_fixed.py
ADDED
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
imgs += [img]
|
225 |
+
|
226 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
227 |
+
mask_h = depth_h > 0
|
228 |
+
# print("valid pixels", np.sum(mask_h))
|
229 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
230 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
231 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
232 |
+
depth_h = distance
|
233 |
+
|
234 |
+
|
235 |
+
depths_h.append(depth_h)
|
236 |
+
masks_h.append(mask_h)
|
237 |
+
|
238 |
+
intrinsic = self.intrinsic
|
239 |
+
intrinsics.append(intrinsic)
|
240 |
+
|
241 |
+
|
242 |
+
near_fars.append(self.near_fars[idx])
|
243 |
+
image_perm = 0 # only supervised on reference view
|
244 |
+
|
245 |
+
mask_dilated = None
|
246 |
+
|
247 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
248 |
+
src_views = list()
|
249 |
+
for i in range(8):
|
250 |
+
# randomly choose 3 different number from [0,3]
|
251 |
+
# local_idxs = np.random.choice(4, 3, replace=False)
|
252 |
+
local_idxs = [0, 2, 3]
|
253 |
+
# local_idxs = np.random.choice(4, 3, replace=False)
|
254 |
+
|
255 |
+
local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
|
256 |
+
src_views += local_idxs
|
257 |
+
for vid in src_views:
|
258 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
259 |
+
|
260 |
+
img = Image.open(img_filename)
|
261 |
+
img_wh = self.img_wh
|
262 |
+
|
263 |
+
img = self.transform(img)
|
264 |
+
if img.shape[0] == 4:
|
265 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
266 |
+
|
267 |
+
imgs += [img]
|
268 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
269 |
+
depths_h.append(depth_h)
|
270 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
271 |
+
|
272 |
+
near_fars.append(self.all_near_fars[vid])
|
273 |
+
intrinsics.append(self.all_intrinsics[vid])
|
274 |
+
|
275 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
276 |
+
|
277 |
+
# print("len(imgs)", len(imgs))
|
278 |
+
# ! estimate scale_mat
|
279 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
280 |
+
img_hw=[img_wh[1], img_wh[0]],
|
281 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
282 |
+
near_fars=near_fars, factor=1.1
|
283 |
+
)
|
284 |
+
|
285 |
+
|
286 |
+
new_near_fars = []
|
287 |
+
new_w2cs = []
|
288 |
+
new_c2ws = []
|
289 |
+
new_affine_mats = []
|
290 |
+
new_depths_h = []
|
291 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
292 |
+
|
293 |
+
P = intrinsic @ extrinsic @ scale_mat
|
294 |
+
P = P[:3, :4]
|
295 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
296 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
297 |
+
w2c = np.linalg.inv(c2w)
|
298 |
+
new_w2cs.append(w2c)
|
299 |
+
new_c2ws.append(c2w)
|
300 |
+
affine_mat = np.eye(4)
|
301 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
302 |
+
new_affine_mats.append(affine_mat)
|
303 |
+
|
304 |
+
camera_o = c2w[:3, 3]
|
305 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
306 |
+
near = dist - 1
|
307 |
+
far = dist + 1
|
308 |
+
|
309 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
310 |
+
new_depths_h.append(depth * scale_factor)
|
311 |
+
|
312 |
+
# print(new_near_fars)
|
313 |
+
imgs = torch.stack(imgs).float()
|
314 |
+
depths_h = np.stack(new_depths_h)
|
315 |
+
masks_h = np.stack(masks_h)
|
316 |
+
|
317 |
+
affine_mats = np.stack(new_affine_mats)
|
318 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
319 |
+
new_near_fars)
|
320 |
+
|
321 |
+
if self.split == 'train':
|
322 |
+
start_idx = 0
|
323 |
+
else:
|
324 |
+
start_idx = 1
|
325 |
+
|
326 |
+
view_ids = [idx] + list(src_views)
|
327 |
+
sample['origin_idx'] = origin_idx
|
328 |
+
sample['images'] = imgs # (V, 3, H, W)
|
329 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
330 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
331 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
332 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
333 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
334 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
335 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
336 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
337 |
+
|
338 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
339 |
+
sample['scan'] = folder_id
|
340 |
+
|
341 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
342 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
343 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
344 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
345 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
346 |
+
|
347 |
+
|
348 |
+
# - image to render
|
349 |
+
sample['query_image'] = sample['images'][0]
|
350 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
351 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
352 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
353 |
+
sample['query_depth'] = sample['depths_h'][0]
|
354 |
+
sample['query_mask'] = sample['masks_h'][0]
|
355 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
356 |
+
|
357 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
358 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
359 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
360 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
361 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
362 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
363 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
364 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
365 |
+
|
366 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
367 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
368 |
+
|
369 |
+
# - generate rays
|
370 |
+
if ('val' in self.split) or ('test' in self.split):
|
371 |
+
sample_rays = gen_rays_from_single_image(
|
372 |
+
img_wh[1], img_wh[0],
|
373 |
+
sample['query_image'],
|
374 |
+
sample['query_intrinsic'],
|
375 |
+
sample['query_c2w'],
|
376 |
+
depth=sample['query_depth'],
|
377 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
378 |
+
else:
|
379 |
+
sample_rays = gen_random_rays_from_single_image(
|
380 |
+
img_wh[1], img_wh[0],
|
381 |
+
self.N_rays,
|
382 |
+
sample['query_image'],
|
383 |
+
sample['query_intrinsic'],
|
384 |
+
sample['query_c2w'],
|
385 |
+
depth=sample['query_depth'],
|
386 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
387 |
+
dilated_mask=mask_dilated,
|
388 |
+
importance_sample=self.importance_sample)
|
389 |
+
|
390 |
+
|
391 |
+
sample['rays'] = sample_rays
|
392 |
+
|
393 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_8_3_random.py
ADDED
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
imgs += [img]
|
225 |
+
|
226 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
227 |
+
mask_h = depth_h > 0
|
228 |
+
# print("valid pixels", np.sum(mask_h))
|
229 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
230 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
231 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
232 |
+
depth_h = distance
|
233 |
+
|
234 |
+
|
235 |
+
depths_h.append(depth_h)
|
236 |
+
masks_h.append(mask_h)
|
237 |
+
|
238 |
+
intrinsic = self.intrinsic
|
239 |
+
intrinsics.append(intrinsic)
|
240 |
+
|
241 |
+
|
242 |
+
near_fars.append(self.near_fars[idx])
|
243 |
+
image_perm = 0 # only supervised on reference view
|
244 |
+
|
245 |
+
mask_dilated = None
|
246 |
+
|
247 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
248 |
+
src_views = list()
|
249 |
+
for i in range(8):
|
250 |
+
|
251 |
+
if self.split == 'train':
|
252 |
+
local_idxs = np.random.choice(4, 3, replace=False)
|
253 |
+
else:
|
254 |
+
local_idxs = [0, 2, 3]
|
255 |
+
# local_idxs = np.random.choice(4, 3, replace=False)
|
256 |
+
|
257 |
+
local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
|
258 |
+
src_views += local_idxs
|
259 |
+
for vid in src_views:
|
260 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
261 |
+
|
262 |
+
img = Image.open(img_filename)
|
263 |
+
img_wh = self.img_wh
|
264 |
+
|
265 |
+
img = self.transform(img)
|
266 |
+
if img.shape[0] == 4:
|
267 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
268 |
+
|
269 |
+
imgs += [img]
|
270 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
271 |
+
depths_h.append(depth_h)
|
272 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
273 |
+
|
274 |
+
near_fars.append(self.all_near_fars[vid])
|
275 |
+
intrinsics.append(self.all_intrinsics[vid])
|
276 |
+
|
277 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
278 |
+
|
279 |
+
# print("len(imgs)", len(imgs))
|
280 |
+
# ! estimate scale_mat
|
281 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
282 |
+
img_hw=[img_wh[1], img_wh[0]],
|
283 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
284 |
+
near_fars=near_fars, factor=1.1
|
285 |
+
)
|
286 |
+
|
287 |
+
|
288 |
+
new_near_fars = []
|
289 |
+
new_w2cs = []
|
290 |
+
new_c2ws = []
|
291 |
+
new_affine_mats = []
|
292 |
+
new_depths_h = []
|
293 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
294 |
+
|
295 |
+
P = intrinsic @ extrinsic @ scale_mat
|
296 |
+
P = P[:3, :4]
|
297 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
298 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
299 |
+
w2c = np.linalg.inv(c2w)
|
300 |
+
new_w2cs.append(w2c)
|
301 |
+
new_c2ws.append(c2w)
|
302 |
+
affine_mat = np.eye(4)
|
303 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
304 |
+
new_affine_mats.append(affine_mat)
|
305 |
+
|
306 |
+
camera_o = c2w[:3, 3]
|
307 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
308 |
+
near = dist - 1
|
309 |
+
far = dist + 1
|
310 |
+
|
311 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
312 |
+
new_depths_h.append(depth * scale_factor)
|
313 |
+
|
314 |
+
# print(new_near_fars)
|
315 |
+
imgs = torch.stack(imgs).float()
|
316 |
+
depths_h = np.stack(new_depths_h)
|
317 |
+
masks_h = np.stack(masks_h)
|
318 |
+
|
319 |
+
affine_mats = np.stack(new_affine_mats)
|
320 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
321 |
+
new_near_fars)
|
322 |
+
|
323 |
+
if self.split == 'train':
|
324 |
+
start_idx = 0
|
325 |
+
else:
|
326 |
+
start_idx = 1
|
327 |
+
|
328 |
+
view_ids = [idx] + list(src_views)
|
329 |
+
sample['origin_idx'] = origin_idx
|
330 |
+
sample['images'] = imgs # (V, 3, H, W)
|
331 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
332 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
333 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
334 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
335 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
336 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
337 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
338 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
339 |
+
|
340 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
341 |
+
sample['scan'] = folder_id
|
342 |
+
|
343 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
344 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
345 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
346 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
347 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
348 |
+
|
349 |
+
|
350 |
+
# - image to render
|
351 |
+
sample['query_image'] = sample['images'][0]
|
352 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
353 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
354 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
355 |
+
sample['query_depth'] = sample['depths_h'][0]
|
356 |
+
sample['query_mask'] = sample['masks_h'][0]
|
357 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
358 |
+
|
359 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
360 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
361 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
362 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
363 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
364 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
365 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
366 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
367 |
+
|
368 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
369 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
370 |
+
|
371 |
+
# - generate rays
|
372 |
+
if ('val' in self.split) or ('test' in self.split):
|
373 |
+
sample_rays = gen_rays_from_single_image(
|
374 |
+
img_wh[1], img_wh[0],
|
375 |
+
sample['query_image'],
|
376 |
+
sample['query_intrinsic'],
|
377 |
+
sample['query_c2w'],
|
378 |
+
depth=sample['query_depth'],
|
379 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
380 |
+
else:
|
381 |
+
sample_rays = gen_random_rays_from_single_image(
|
382 |
+
img_wh[1], img_wh[0],
|
383 |
+
self.N_rays,
|
384 |
+
sample['query_image'],
|
385 |
+
sample['query_intrinsic'],
|
386 |
+
sample['query_c2w'],
|
387 |
+
depth=sample['query_depth'],
|
388 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
389 |
+
dilated_mask=mask_dilated,
|
390 |
+
importance_sample=self.importance_sample)
|
391 |
+
|
392 |
+
|
393 |
+
sample['rays'] = sample_rays
|
394 |
+
|
395 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_8_4_random_shading.py
ADDED
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
imgs += [img]
|
225 |
+
|
226 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
227 |
+
mask_h = depth_h > 0
|
228 |
+
# print("valid pixels", np.sum(mask_h))
|
229 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
230 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
231 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
232 |
+
depth_h = distance
|
233 |
+
|
234 |
+
|
235 |
+
depths_h.append(depth_h)
|
236 |
+
masks_h.append(mask_h)
|
237 |
+
|
238 |
+
intrinsic = self.intrinsic
|
239 |
+
intrinsics.append(intrinsic)
|
240 |
+
|
241 |
+
|
242 |
+
near_fars.append(self.near_fars[idx])
|
243 |
+
image_perm = 0 # only supervised on reference view
|
244 |
+
|
245 |
+
mask_dilated = None
|
246 |
+
|
247 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
248 |
+
src_views = range(8, 8 + 8 * 4)
|
249 |
+
|
250 |
+
for vid in src_views:
|
251 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
252 |
+
|
253 |
+
img = Image.open(img_filename)
|
254 |
+
img_wh = self.img_wh
|
255 |
+
|
256 |
+
img = self.transform(img)
|
257 |
+
if img.shape[0] == 4:
|
258 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
259 |
+
|
260 |
+
imgs += [img]
|
261 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
262 |
+
depths_h.append(depth_h)
|
263 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
264 |
+
|
265 |
+
near_fars.append(self.all_near_fars[vid])
|
266 |
+
intrinsics.append(self.all_intrinsics[vid])
|
267 |
+
|
268 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
269 |
+
|
270 |
+
|
271 |
+
# ! estimate scale_mat
|
272 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
273 |
+
img_hw=[img_wh[1], img_wh[0]],
|
274 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
275 |
+
near_fars=near_fars, factor=1.1
|
276 |
+
)
|
277 |
+
|
278 |
+
|
279 |
+
new_near_fars = []
|
280 |
+
new_w2cs = []
|
281 |
+
new_c2ws = []
|
282 |
+
new_affine_mats = []
|
283 |
+
new_depths_h = []
|
284 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
285 |
+
|
286 |
+
P = intrinsic @ extrinsic @ scale_mat
|
287 |
+
P = P[:3, :4]
|
288 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
289 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
290 |
+
w2c = np.linalg.inv(c2w)
|
291 |
+
new_w2cs.append(w2c)
|
292 |
+
new_c2ws.append(c2w)
|
293 |
+
affine_mat = np.eye(4)
|
294 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
295 |
+
new_affine_mats.append(affine_mat)
|
296 |
+
|
297 |
+
camera_o = c2w[:3, 3]
|
298 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
299 |
+
near = dist - 1
|
300 |
+
far = dist + 1
|
301 |
+
|
302 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
303 |
+
new_depths_h.append(depth * scale_factor)
|
304 |
+
|
305 |
+
if self.split == 'train':
|
306 |
+
# randomly select one view from eight views as reference view
|
307 |
+
idx_to_select = np.random.randint(0, 8)
|
308 |
+
|
309 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}.png')
|
310 |
+
img = Image.open(img_filename)
|
311 |
+
img = self.transform(img) # (4, h, w)
|
312 |
+
|
313 |
+
if img.shape[0] == 4:
|
314 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
315 |
+
|
316 |
+
imgs[0] = img
|
317 |
+
|
318 |
+
w2c_selected = self.all_extrinsics[idx_to_select] @ w2c_ref_inv
|
319 |
+
P = self.all_intrinsics[idx_to_select] @ w2c_selected @ scale_mat
|
320 |
+
P = P[:3, :4]
|
321 |
+
|
322 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
323 |
+
w2c = np.linalg.inv(c2w)
|
324 |
+
affine_mat = np.eye(4)
|
325 |
+
affine_mat[:3, :4] = self.all_intrinsics[idx_to_select][:3, :3] @ w2c[:3, :4]
|
326 |
+
new_affine_mats[0] = affine_mat
|
327 |
+
camera_o = c2w[:3, 3]
|
328 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
329 |
+
near = dist - 1
|
330 |
+
far = dist + 1
|
331 |
+
new_near_fars[0] = [0.95 * near, 1.05 * far]
|
332 |
+
|
333 |
+
new_w2cs[0] = w2c
|
334 |
+
new_c2ws[0] = c2w
|
335 |
+
|
336 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx_to_select}_depth_mm.png'))
|
337 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
338 |
+
mask_h = depth_h > 0
|
339 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
340 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
341 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
342 |
+
depth_h = distance * scale_factor
|
343 |
+
|
344 |
+
new_depths_h[0] = depth_h
|
345 |
+
masks_h[0] = mask_h
|
346 |
+
|
347 |
+
|
348 |
+
|
349 |
+
# print(new_near_fars)
|
350 |
+
imgs = torch.stack(imgs).float()
|
351 |
+
depths_h = np.stack(new_depths_h)
|
352 |
+
masks_h = np.stack(masks_h)
|
353 |
+
|
354 |
+
affine_mats = np.stack(new_affine_mats)
|
355 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
356 |
+
new_near_fars)
|
357 |
+
|
358 |
+
if self.split == 'train':
|
359 |
+
start_idx = 0
|
360 |
+
else:
|
361 |
+
start_idx = 1
|
362 |
+
|
363 |
+
|
364 |
+
view_ids = [idx] + list(src_views)
|
365 |
+
sample['origin_idx'] = origin_idx
|
366 |
+
sample['images'] = imgs # (V, 3, H, W)
|
367 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
368 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
369 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
370 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
371 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
372 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
373 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
374 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
375 |
+
|
376 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
377 |
+
sample['scan'] = folder_id
|
378 |
+
|
379 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
380 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
381 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
382 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
383 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
384 |
+
|
385 |
+
|
386 |
+
# - image to render
|
387 |
+
sample['query_image'] = sample['images'][0]
|
388 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
389 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
390 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
391 |
+
sample['query_depth'] = sample['depths_h'][0]
|
392 |
+
sample['query_mask'] = sample['masks_h'][0]
|
393 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
394 |
+
|
395 |
+
|
396 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
397 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
398 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
399 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
400 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
401 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
402 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
403 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
404 |
+
|
405 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
406 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
407 |
+
|
408 |
+
# - generate rays
|
409 |
+
if ('val' in self.split) or ('test' in self.split):
|
410 |
+
sample_rays = gen_rays_from_single_image(
|
411 |
+
img_wh[1], img_wh[0],
|
412 |
+
sample['query_image'],
|
413 |
+
sample['query_intrinsic'],
|
414 |
+
sample['query_c2w'],
|
415 |
+
depth=sample['query_depth'],
|
416 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
417 |
+
else:
|
418 |
+
sample_rays = gen_random_rays_from_single_image(
|
419 |
+
img_wh[1], img_wh[0],
|
420 |
+
self.N_rays,
|
421 |
+
sample['query_image'],
|
422 |
+
sample['query_intrinsic'],
|
423 |
+
sample['query_c2w'],
|
424 |
+
depth=sample['query_depth'],
|
425 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
426 |
+
dilated_mask=mask_dilated,
|
427 |
+
importance_sample=self.importance_sample)
|
428 |
+
|
429 |
+
|
430 |
+
sample['rays'] = sample_rays
|
431 |
+
|
432 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all.py
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
imgs += [img]
|
225 |
+
|
226 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
227 |
+
mask_h = depth_h > 0
|
228 |
+
# print("valid pixels", np.sum(mask_h))
|
229 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
230 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
231 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
232 |
+
depth_h = distance
|
233 |
+
|
234 |
+
|
235 |
+
depths_h.append(depth_h)
|
236 |
+
masks_h.append(mask_h)
|
237 |
+
|
238 |
+
intrinsic = self.intrinsic
|
239 |
+
intrinsics.append(intrinsic)
|
240 |
+
|
241 |
+
|
242 |
+
near_fars.append(self.near_fars[idx])
|
243 |
+
image_perm = 0 # only supervised on reference view
|
244 |
+
|
245 |
+
mask_dilated = None
|
246 |
+
|
247 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
248 |
+
src_views = range(8, 8 + 8 * 4)
|
249 |
+
|
250 |
+
for vid in src_views:
|
251 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
252 |
+
|
253 |
+
img = Image.open(img_filename)
|
254 |
+
img_wh = self.img_wh
|
255 |
+
|
256 |
+
img = self.transform(img)
|
257 |
+
if img.shape[0] == 4:
|
258 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
259 |
+
|
260 |
+
imgs += [img]
|
261 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
262 |
+
depths_h.append(depth_h)
|
263 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
264 |
+
|
265 |
+
near_fars.append(self.all_near_fars[vid])
|
266 |
+
intrinsics.append(self.all_intrinsics[vid])
|
267 |
+
|
268 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
269 |
+
|
270 |
+
|
271 |
+
# ! estimate scale_mat
|
272 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
273 |
+
img_hw=[img_wh[1], img_wh[0]],
|
274 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
275 |
+
near_fars=near_fars, factor=1.1
|
276 |
+
)
|
277 |
+
|
278 |
+
|
279 |
+
new_near_fars = []
|
280 |
+
new_w2cs = []
|
281 |
+
new_c2ws = []
|
282 |
+
new_affine_mats = []
|
283 |
+
new_depths_h = []
|
284 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
285 |
+
|
286 |
+
P = intrinsic @ extrinsic @ scale_mat
|
287 |
+
P = P[:3, :4]
|
288 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
289 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
290 |
+
w2c = np.linalg.inv(c2w)
|
291 |
+
new_w2cs.append(w2c)
|
292 |
+
new_c2ws.append(c2w)
|
293 |
+
affine_mat = np.eye(4)
|
294 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
295 |
+
new_affine_mats.append(affine_mat)
|
296 |
+
|
297 |
+
camera_o = c2w[:3, 3]
|
298 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
299 |
+
near = dist - 1
|
300 |
+
far = dist + 1
|
301 |
+
|
302 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
303 |
+
new_depths_h.append(depth * scale_factor)
|
304 |
+
|
305 |
+
# print(new_near_fars)
|
306 |
+
imgs = torch.stack(imgs).float()
|
307 |
+
depths_h = np.stack(new_depths_h)
|
308 |
+
masks_h = np.stack(masks_h)
|
309 |
+
|
310 |
+
affine_mats = np.stack(new_affine_mats)
|
311 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
312 |
+
new_near_fars)
|
313 |
+
|
314 |
+
if self.split == 'train':
|
315 |
+
start_idx = 0
|
316 |
+
else:
|
317 |
+
start_idx = 1
|
318 |
+
|
319 |
+
view_ids = [idx] + list(src_views)
|
320 |
+
sample['origin_idx'] = origin_idx
|
321 |
+
sample['images'] = imgs # (V, 3, H, W)
|
322 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
323 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
324 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
325 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
326 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
327 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
328 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
329 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
330 |
+
|
331 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
332 |
+
sample['scan'] = folder_id
|
333 |
+
|
334 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
335 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
336 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
337 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
338 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
339 |
+
|
340 |
+
|
341 |
+
# - image to render
|
342 |
+
sample['query_image'] = sample['images'][0]
|
343 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
344 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
345 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
346 |
+
sample['query_depth'] = sample['depths_h'][0]
|
347 |
+
sample['query_mask'] = sample['masks_h'][0]
|
348 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
349 |
+
|
350 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
351 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
352 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
353 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
354 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
355 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
356 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
357 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
358 |
+
|
359 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
360 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
361 |
+
|
362 |
+
# - generate rays
|
363 |
+
if ('val' in self.split) or ('test' in self.split):
|
364 |
+
sample_rays = gen_rays_from_single_image(
|
365 |
+
img_wh[1], img_wh[0],
|
366 |
+
sample['query_image'],
|
367 |
+
sample['query_intrinsic'],
|
368 |
+
sample['query_c2w'],
|
369 |
+
depth=sample['query_depth'],
|
370 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
371 |
+
else:
|
372 |
+
sample_rays = gen_random_rays_from_single_image(
|
373 |
+
img_wh[1], img_wh[0],
|
374 |
+
self.N_rays,
|
375 |
+
sample['query_image'],
|
376 |
+
sample['query_intrinsic'],
|
377 |
+
sample['query_c2w'],
|
378 |
+
depth=sample['query_depth'],
|
379 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
380 |
+
dilated_mask=mask_dilated,
|
381 |
+
importance_sample=self.importance_sample)
|
382 |
+
|
383 |
+
|
384 |
+
sample['rays'] = sample_rays
|
385 |
+
|
386 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage.py
ADDED
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
# print("img_pre", img.shape)
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
# print("img", img.shape)
|
225 |
+
imgs += [img]
|
226 |
+
|
227 |
+
|
228 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
229 |
+
mask_h = depth_h > 0
|
230 |
+
# print("valid pixels", np.sum(mask_h))
|
231 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
232 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
233 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
234 |
+
depth_h = distance
|
235 |
+
# print("depth_h", depth_h.shape)
|
236 |
+
|
237 |
+
depths_h.append(depth_h)
|
238 |
+
masks_h.append(mask_h)
|
239 |
+
|
240 |
+
intrinsic = self.intrinsic
|
241 |
+
intrinsics.append(intrinsic)
|
242 |
+
|
243 |
+
|
244 |
+
near_fars.append(self.near_fars[idx])
|
245 |
+
image_perm = 0 # only supervised on reference view
|
246 |
+
|
247 |
+
mask_dilated = None
|
248 |
+
|
249 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
250 |
+
src_views = range(8, 8 + 8 * 4)
|
251 |
+
|
252 |
+
for vid in src_views:
|
253 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
|
254 |
+
|
255 |
+
img = Image.open(img_filename)
|
256 |
+
img_wh = self.img_wh
|
257 |
+
|
258 |
+
img = self.transform(img)
|
259 |
+
if img.shape[0] == 4:
|
260 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
261 |
+
|
262 |
+
imgs += [img]
|
263 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
264 |
+
depths_h.append(depth_h)
|
265 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
266 |
+
|
267 |
+
near_fars.append(self.all_near_fars[vid])
|
268 |
+
intrinsics.append(self.all_intrinsics[vid])
|
269 |
+
|
270 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
271 |
+
|
272 |
+
|
273 |
+
# ! estimate scale_mat
|
274 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
275 |
+
img_hw=[img_wh[1], img_wh[0]],
|
276 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
277 |
+
near_fars=near_fars, factor=1.1
|
278 |
+
)
|
279 |
+
|
280 |
+
|
281 |
+
new_near_fars = []
|
282 |
+
new_w2cs = []
|
283 |
+
new_c2ws = []
|
284 |
+
new_affine_mats = []
|
285 |
+
new_depths_h = []
|
286 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
287 |
+
|
288 |
+
P = intrinsic @ extrinsic @ scale_mat
|
289 |
+
P = P[:3, :4]
|
290 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
291 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
292 |
+
w2c = np.linalg.inv(c2w)
|
293 |
+
new_w2cs.append(w2c)
|
294 |
+
new_c2ws.append(c2w)
|
295 |
+
affine_mat = np.eye(4)
|
296 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
297 |
+
new_affine_mats.append(affine_mat)
|
298 |
+
|
299 |
+
camera_o = c2w[:3, 3]
|
300 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
301 |
+
near = dist - 1
|
302 |
+
far = dist + 1
|
303 |
+
|
304 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
305 |
+
new_depths_h.append(depth * scale_factor)
|
306 |
+
|
307 |
+
# print(new_near_fars)
|
308 |
+
imgs = torch.stack(imgs).float()
|
309 |
+
depths_h = np.stack(new_depths_h)
|
310 |
+
masks_h = np.stack(masks_h)
|
311 |
+
|
312 |
+
affine_mats = np.stack(new_affine_mats)
|
313 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
314 |
+
new_near_fars)
|
315 |
+
|
316 |
+
if self.split == 'train':
|
317 |
+
start_idx = 0
|
318 |
+
else:
|
319 |
+
start_idx = 1
|
320 |
+
|
321 |
+
|
322 |
+
|
323 |
+
target_w2cs = []
|
324 |
+
target_intrinsics = []
|
325 |
+
new_target_w2cs = []
|
326 |
+
for i_idx in range(8):
|
327 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
328 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
329 |
+
|
330 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
331 |
+
|
332 |
+
P = intrinsic @ extrinsic @ scale_mat
|
333 |
+
P = P[:3, :4]
|
334 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
335 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
336 |
+
w2c = np.linalg.inv(c2w)
|
337 |
+
new_target_w2cs.append(w2c)
|
338 |
+
target_w2cs = np.stack(new_target_w2cs)
|
339 |
+
|
340 |
+
|
341 |
+
|
342 |
+
view_ids = [idx] + list(src_views)
|
343 |
+
sample['origin_idx'] = origin_idx
|
344 |
+
sample['images'] = imgs # (V, 3, H, W)
|
345 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
346 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
347 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
348 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
349 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
350 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
351 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
352 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
353 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
354 |
+
|
355 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
356 |
+
sample['scan'] = folder_id
|
357 |
+
|
358 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
359 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
360 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
361 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
362 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
363 |
+
|
364 |
+
|
365 |
+
# - image to render
|
366 |
+
sample['query_image'] = sample['images'][0]
|
367 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
368 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
369 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
370 |
+
sample['query_depth'] = sample['depths_h'][0]
|
371 |
+
sample['query_mask'] = sample['masks_h'][0]
|
372 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
373 |
+
|
374 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
375 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
376 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
377 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
378 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
379 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
380 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
381 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
382 |
+
|
383 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
384 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
385 |
+
|
386 |
+
# - generate rays
|
387 |
+
if ('val' in self.split) or ('test' in self.split):
|
388 |
+
sample_rays = gen_rays_from_single_image(
|
389 |
+
img_wh[1], img_wh[0],
|
390 |
+
sample['query_image'],
|
391 |
+
sample['query_intrinsic'],
|
392 |
+
sample['query_c2w'],
|
393 |
+
depth=sample['query_depth'],
|
394 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
395 |
+
else:
|
396 |
+
sample_rays = gen_random_rays_from_single_image(
|
397 |
+
img_wh[1], img_wh[0],
|
398 |
+
self.N_rays,
|
399 |
+
sample['query_image'],
|
400 |
+
sample['query_intrinsic'],
|
401 |
+
sample['query_c2w'],
|
402 |
+
depth=sample['query_depth'],
|
403 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
404 |
+
dilated_mask=mask_dilated,
|
405 |
+
importance_sample=self.importance_sample)
|
406 |
+
|
407 |
+
|
408 |
+
sample['rays'] = sample_rays
|
409 |
+
|
410 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_2_stage_temp.py
ADDED
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 10
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
idx = idx * 8
|
192 |
+
sample = {}
|
193 |
+
origin_idx = idx
|
194 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
195 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
196 |
+
|
197 |
+
|
198 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
199 |
+
idx = idx % 8 # [0, 7]
|
200 |
+
folder_id = folder_uid_dict['folder_id']
|
201 |
+
uid = folder_uid_dict['uid']
|
202 |
+
|
203 |
+
|
204 |
+
# target view
|
205 |
+
c2w = self.c2ws[idx]
|
206 |
+
w2c = np.linalg.inv(c2w)
|
207 |
+
w2c_ref = w2c
|
208 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
209 |
+
|
210 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
211 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
212 |
+
|
213 |
+
img_filename = os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}.png')
|
214 |
+
|
215 |
+
depth_filename = os.path.join(os.path.join("/objaverse-processed/zero12345_img/zero12345_narrow/", folder_id, uid, f'view_{idx}_depth_mm.png'))
|
216 |
+
|
217 |
+
|
218 |
+
img = Image.open(img_filename)
|
219 |
+
|
220 |
+
img = self.transform(img) # (4, h, w)
|
221 |
+
|
222 |
+
# print("img_pre", img.shape)
|
223 |
+
if img.shape[0] == 4:
|
224 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
225 |
+
# print("img", img.shape)
|
226 |
+
imgs += [img]
|
227 |
+
|
228 |
+
|
229 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
230 |
+
mask_h = depth_h > 0
|
231 |
+
# print("valid pixels", np.sum(mask_h))
|
232 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
233 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
234 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
235 |
+
depth_h = distance
|
236 |
+
# print("depth_h", depth_h.shape)
|
237 |
+
|
238 |
+
depths_h.append(depth_h)
|
239 |
+
masks_h.append(mask_h)
|
240 |
+
|
241 |
+
intrinsic = self.intrinsic
|
242 |
+
intrinsics.append(intrinsic)
|
243 |
+
|
244 |
+
|
245 |
+
near_fars.append(self.near_fars[idx])
|
246 |
+
image_perm = 0 # only supervised on reference view
|
247 |
+
|
248 |
+
mask_dilated = None
|
249 |
+
|
250 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
251 |
+
src_views = range(8, 8 + 8 * 4)
|
252 |
+
|
253 |
+
for vid in src_views:
|
254 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_0_{(vid - 8) // 4}_{vid % 4 + 1}.png')
|
255 |
+
|
256 |
+
img = Image.open(img_filename)
|
257 |
+
img_wh = self.img_wh
|
258 |
+
|
259 |
+
img = self.transform(img)
|
260 |
+
if img.shape[0] == 4:
|
261 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
262 |
+
|
263 |
+
imgs += [img]
|
264 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
265 |
+
depths_h.append(depth_h)
|
266 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
267 |
+
|
268 |
+
near_fars.append(self.all_near_fars[vid])
|
269 |
+
intrinsics.append(self.all_intrinsics[vid])
|
270 |
+
|
271 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
272 |
+
|
273 |
+
|
274 |
+
# ! estimate scale_mat
|
275 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
276 |
+
img_hw=[img_wh[1], img_wh[0]],
|
277 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
278 |
+
near_fars=near_fars, factor=1.1
|
279 |
+
)
|
280 |
+
|
281 |
+
|
282 |
+
new_near_fars = []
|
283 |
+
new_w2cs = []
|
284 |
+
new_c2ws = []
|
285 |
+
new_affine_mats = []
|
286 |
+
new_depths_h = []
|
287 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
288 |
+
|
289 |
+
P = intrinsic @ extrinsic @ scale_mat
|
290 |
+
P = P[:3, :4]
|
291 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
292 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
293 |
+
w2c = np.linalg.inv(c2w)
|
294 |
+
new_w2cs.append(w2c)
|
295 |
+
new_c2ws.append(c2w)
|
296 |
+
affine_mat = np.eye(4)
|
297 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
298 |
+
new_affine_mats.append(affine_mat)
|
299 |
+
|
300 |
+
camera_o = c2w[:3, 3]
|
301 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
302 |
+
near = dist - 1
|
303 |
+
far = dist + 1
|
304 |
+
|
305 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
306 |
+
new_depths_h.append(depth * scale_factor)
|
307 |
+
|
308 |
+
# print(new_near_fars)
|
309 |
+
imgs = torch.stack(imgs).float()
|
310 |
+
depths_h = np.stack(new_depths_h)
|
311 |
+
masks_h = np.stack(masks_h)
|
312 |
+
|
313 |
+
affine_mats = np.stack(new_affine_mats)
|
314 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
315 |
+
new_near_fars)
|
316 |
+
|
317 |
+
if self.split == 'train':
|
318 |
+
start_idx = 0
|
319 |
+
else:
|
320 |
+
start_idx = 1
|
321 |
+
|
322 |
+
|
323 |
+
|
324 |
+
target_w2cs = []
|
325 |
+
target_intrinsics = []
|
326 |
+
new_target_w2cs = []
|
327 |
+
for i_idx in range(8):
|
328 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
329 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
330 |
+
|
331 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
332 |
+
|
333 |
+
P = intrinsic @ extrinsic @ scale_mat
|
334 |
+
P = P[:3, :4]
|
335 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
336 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
337 |
+
w2c = np.linalg.inv(c2w)
|
338 |
+
new_target_w2cs.append(w2c)
|
339 |
+
target_w2cs = np.stack(new_target_w2cs)
|
340 |
+
|
341 |
+
|
342 |
+
|
343 |
+
view_ids = [idx] + list(src_views)
|
344 |
+
sample['origin_idx'] = origin_idx
|
345 |
+
sample['images'] = imgs # (V, 3, H, W)
|
346 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
347 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
348 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
349 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
350 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
351 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
352 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
353 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
354 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
355 |
+
|
356 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
357 |
+
sample['scan'] = folder_id
|
358 |
+
|
359 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
360 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
361 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
362 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
363 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
364 |
+
|
365 |
+
|
366 |
+
# - image to render
|
367 |
+
sample['query_image'] = sample['images'][0]
|
368 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
369 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
370 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
371 |
+
sample['query_depth'] = sample['depths_h'][0]
|
372 |
+
sample['query_mask'] = sample['masks_h'][0]
|
373 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
374 |
+
|
375 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
376 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
377 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
378 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
379 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
380 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
381 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
382 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
383 |
+
|
384 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
385 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
386 |
+
|
387 |
+
# - generate rays
|
388 |
+
if ('val' in self.split) or ('test' in self.split):
|
389 |
+
sample_rays = gen_rays_from_single_image(
|
390 |
+
img_wh[1], img_wh[0],
|
391 |
+
sample['query_image'],
|
392 |
+
sample['query_intrinsic'],
|
393 |
+
sample['query_c2w'],
|
394 |
+
depth=sample['query_depth'],
|
395 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
396 |
+
else:
|
397 |
+
sample_rays = gen_random_rays_from_single_image(
|
398 |
+
img_wh[1], img_wh[0],
|
399 |
+
self.N_rays,
|
400 |
+
sample['query_image'],
|
401 |
+
sample['query_intrinsic'],
|
402 |
+
sample['query_c2w'],
|
403 |
+
depth=sample['query_depth'],
|
404 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
405 |
+
dilated_mask=mask_dilated,
|
406 |
+
importance_sample=self.importance_sample)
|
407 |
+
|
408 |
+
|
409 |
+
sample['rays'] = sample_rays
|
410 |
+
|
411 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data.py
ADDED
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = [""] # os.listdir(main_folder) # MODIFIED
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
# return 8*len(self.lvis_paths)
|
161 |
+
return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
src_views = range(8, 8 + 8 * 4)
|
258 |
+
|
259 |
+
for vid in src_views:
|
260 |
+
|
261 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
262 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
263 |
+
img = Image.open(img_filename)
|
264 |
+
img_wh = self.img_wh
|
265 |
+
|
266 |
+
img = self.transform(img)
|
267 |
+
if img.shape[0] == 4:
|
268 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
269 |
+
|
270 |
+
imgs += [img]
|
271 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
272 |
+
depths_h.append(depth_h)
|
273 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
274 |
+
|
275 |
+
near_fars.append(self.all_near_fars[vid])
|
276 |
+
intrinsics.append(self.all_intrinsics[vid])
|
277 |
+
|
278 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
279 |
+
|
280 |
+
|
281 |
+
# ! estimate scale_mat
|
282 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
283 |
+
img_hw=[img_wh[1], img_wh[0]],
|
284 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
285 |
+
near_fars=near_fars, factor=1.1
|
286 |
+
)
|
287 |
+
|
288 |
+
|
289 |
+
new_near_fars = []
|
290 |
+
new_w2cs = []
|
291 |
+
new_c2ws = []
|
292 |
+
new_affine_mats = []
|
293 |
+
new_depths_h = []
|
294 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
295 |
+
|
296 |
+
P = intrinsic @ extrinsic @ scale_mat
|
297 |
+
P = P[:3, :4]
|
298 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
299 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
300 |
+
w2c = np.linalg.inv(c2w)
|
301 |
+
new_w2cs.append(w2c)
|
302 |
+
new_c2ws.append(c2w)
|
303 |
+
affine_mat = np.eye(4)
|
304 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
305 |
+
new_affine_mats.append(affine_mat)
|
306 |
+
|
307 |
+
camera_o = c2w[:3, 3]
|
308 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
309 |
+
near = dist - 1
|
310 |
+
far = dist + 1
|
311 |
+
|
312 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
313 |
+
new_depths_h.append(depth * scale_factor)
|
314 |
+
|
315 |
+
# print(new_near_fars)
|
316 |
+
imgs = torch.stack(imgs).float()
|
317 |
+
depths_h = np.stack(new_depths_h)
|
318 |
+
masks_h = np.stack(masks_h)
|
319 |
+
|
320 |
+
affine_mats = np.stack(new_affine_mats)
|
321 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
322 |
+
new_near_fars)
|
323 |
+
|
324 |
+
if self.split == 'train':
|
325 |
+
start_idx = 0
|
326 |
+
else:
|
327 |
+
start_idx = 1
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
target_w2cs = []
|
332 |
+
target_intrinsics = []
|
333 |
+
new_target_w2cs = []
|
334 |
+
for i_idx in range(8):
|
335 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
336 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
337 |
+
|
338 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
339 |
+
|
340 |
+
P = intrinsic @ extrinsic @ scale_mat
|
341 |
+
P = P[:3, :4]
|
342 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
343 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
344 |
+
w2c = np.linalg.inv(c2w)
|
345 |
+
new_target_w2cs.append(w2c)
|
346 |
+
target_w2cs = np.stack(new_target_w2cs)
|
347 |
+
|
348 |
+
|
349 |
+
|
350 |
+
view_ids = [idx] + list(src_views)
|
351 |
+
sample['origin_idx'] = origin_idx
|
352 |
+
sample['images'] = imgs # (V, 3, H, W)
|
353 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
354 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
355 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
356 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
357 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
358 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
359 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
360 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
361 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
362 |
+
|
363 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
364 |
+
sample['scan'] = shape_name
|
365 |
+
|
366 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
367 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
368 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
369 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
370 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
371 |
+
# print("meta: ", sample['meta'])
|
372 |
+
|
373 |
+
# - image to render
|
374 |
+
sample['query_image'] = sample['images'][0]
|
375 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
376 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
377 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
378 |
+
sample['query_depth'] = sample['depths_h'][0]
|
379 |
+
sample['query_mask'] = sample['masks_h'][0]
|
380 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
381 |
+
|
382 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
383 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
384 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
385 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
386 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
387 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
388 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
389 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
390 |
+
|
391 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
392 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
393 |
+
|
394 |
+
# - generate rays
|
395 |
+
if ('val' in self.split) or ('test' in self.split):
|
396 |
+
sample_rays = gen_rays_from_single_image(
|
397 |
+
img_wh[1], img_wh[0],
|
398 |
+
sample['query_image'],
|
399 |
+
sample['query_intrinsic'],
|
400 |
+
sample['query_c2w'],
|
401 |
+
depth=sample['query_depth'],
|
402 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
403 |
+
else:
|
404 |
+
sample_rays = gen_random_rays_from_single_image(
|
405 |
+
img_wh[1], img_wh[0],
|
406 |
+
self.N_rays,
|
407 |
+
sample['query_image'],
|
408 |
+
sample['query_intrinsic'],
|
409 |
+
sample['query_c2w'],
|
410 |
+
depth=sample['query_depth'],
|
411 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
412 |
+
dilated_mask=mask_dilated,
|
413 |
+
importance_sample=self.importance_sample)
|
414 |
+
|
415 |
+
|
416 |
+
sample['rays'] = sample_rays
|
417 |
+
|
418 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data3_1.py
ADDED
@@ -0,0 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
# self.specific_dataset_name = 'Realfusion'
|
73 |
+
self.specific_dataset_name = 'Objaverse'
|
74 |
+
self.n_views = n_views
|
75 |
+
self.N_rays = N_rays
|
76 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
77 |
+
|
78 |
+
self.clean_image = clean_image
|
79 |
+
self.importance_sample = importance_sample
|
80 |
+
self.test_ref_views = test_ref_views # used for testing
|
81 |
+
self.scale_factor = 1.0
|
82 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
83 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
84 |
+
# find all subfolders
|
85 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
86 |
+
self.shape_list = os.listdir(main_folder)
|
87 |
+
self.shape_list.sort()
|
88 |
+
|
89 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
90 |
+
|
91 |
+
|
92 |
+
self.lvis_paths = []
|
93 |
+
for shape_name in self.shape_list:
|
94 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
95 |
+
|
96 |
+
# print("lvis_paths: ", self.lvis_paths)
|
97 |
+
|
98 |
+
if img_wh is not None:
|
99 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
100 |
+
'img_wh must both be multiples of 32!'
|
101 |
+
|
102 |
+
|
103 |
+
# * bounding box for rendering
|
104 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
105 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
106 |
+
|
107 |
+
# - used for cost volume regularization
|
108 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
109 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
110 |
+
|
111 |
+
|
112 |
+
def define_transforms(self):
|
113 |
+
self.transform = T.Compose([T.ToTensor()])
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
def load_cam_info(self):
|
118 |
+
for vid, img_id in enumerate(self.img_ids):
|
119 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
120 |
+
self.all_intrinsics.append(intrinsic)
|
121 |
+
self.all_extrinsics.append(extrinsic)
|
122 |
+
self.all_near_fars.append(near_far)
|
123 |
+
|
124 |
+
def read_depth(self, filename):
|
125 |
+
pass
|
126 |
+
|
127 |
+
def read_mask(self, filename):
|
128 |
+
mask_h = cv2.imread(filename, 0)
|
129 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
130 |
+
interpolation=cv2.INTER_NEAREST)
|
131 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
132 |
+
interpolation=cv2.INTER_NEAREST)
|
133 |
+
|
134 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
135 |
+
mask_h[mask_h > 0] = 1
|
136 |
+
|
137 |
+
return mask, mask_h
|
138 |
+
|
139 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
140 |
+
|
141 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
142 |
+
|
143 |
+
radius = radius * factor
|
144 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
145 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
146 |
+
scale_mat = scale_mat.astype(np.float32)
|
147 |
+
|
148 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
149 |
+
|
150 |
+
def __len__(self):
|
151 |
+
# return 8*len(self.lvis_paths)
|
152 |
+
return len(self.lvis_paths)
|
153 |
+
|
154 |
+
|
155 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
156 |
+
pass
|
157 |
+
|
158 |
+
|
159 |
+
def __getitem__(self, idx):
|
160 |
+
sample = {}
|
161 |
+
idx = idx * 8 # to be deleted
|
162 |
+
origin_idx = idx
|
163 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
164 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
165 |
+
|
166 |
+
|
167 |
+
folder_path = self.lvis_paths[idx//8]
|
168 |
+
idx = idx % 8 # [0, 7]
|
169 |
+
|
170 |
+
# last subdir name
|
171 |
+
shape_name = os.path.split(folder_path)[-1]
|
172 |
+
|
173 |
+
|
174 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
175 |
+
with open(pose_json_path, 'r') as f:
|
176 |
+
meta = json.load(f)
|
177 |
+
|
178 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
179 |
+
self.img_wh = (256, 256)
|
180 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
181 |
+
intrinsic = np.eye(4)
|
182 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
183 |
+
self.intrinsic = intrinsic
|
184 |
+
self.near_far = np.array(meta["near_far"])
|
185 |
+
self.near_far[1] = 1.8
|
186 |
+
self.define_transforms()
|
187 |
+
self.blender2opencv = np.array(
|
188 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
189 |
+
)
|
190 |
+
|
191 |
+
|
192 |
+
self.c2ws = []
|
193 |
+
self.w2cs = []
|
194 |
+
self.near_fars = []
|
195 |
+
# self.root_dir = root_dir
|
196 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
197 |
+
pose = self.input_poses[image_dix]
|
198 |
+
c2w = pose @ self.blender2opencv
|
199 |
+
self.c2ws.append(c2w)
|
200 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
201 |
+
self.near_fars.append(self.near_far)
|
202 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
203 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
204 |
+
|
205 |
+
|
206 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
207 |
+
self.all_extrinsics = []
|
208 |
+
self.all_near_fars = []
|
209 |
+
self.load_cam_info()
|
210 |
+
|
211 |
+
|
212 |
+
# target view
|
213 |
+
c2w = self.c2ws[idx]
|
214 |
+
w2c = np.linalg.inv(c2w)
|
215 |
+
w2c_ref = w2c
|
216 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
217 |
+
|
218 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
219 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
220 |
+
|
221 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
222 |
+
# print(self.img_ids)
|
223 |
+
img = Image.open(img_filename)
|
224 |
+
img = self.transform(img) # (4, h, w)
|
225 |
+
|
226 |
+
|
227 |
+
if img.shape[0] == 4:
|
228 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
229 |
+
imgs += [img]
|
230 |
+
|
231 |
+
|
232 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
233 |
+
depth_h = depth_h.fill_(-1.0)
|
234 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
235 |
+
|
236 |
+
|
237 |
+
depths_h.append(depth_h)
|
238 |
+
masks_h.append(mask_h)
|
239 |
+
|
240 |
+
intrinsic = self.intrinsic
|
241 |
+
intrinsics.append(intrinsic)
|
242 |
+
|
243 |
+
|
244 |
+
near_fars.append(self.near_fars[idx])
|
245 |
+
image_perm = 0 # only supervised on reference view
|
246 |
+
|
247 |
+
mask_dilated = None
|
248 |
+
|
249 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
250 |
+
src_views = range(8, 8 + 8 * 4)
|
251 |
+
|
252 |
+
for vid in src_views:
|
253 |
+
if vid % 4 == 0:
|
254 |
+
vid = (vid - 8) // 4
|
255 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
|
256 |
+
else:
|
257 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
258 |
+
|
259 |
+
img = Image.open(img_filename)
|
260 |
+
img_wh = self.img_wh
|
261 |
+
|
262 |
+
img = self.transform(img)
|
263 |
+
if img.shape[0] == 4:
|
264 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
265 |
+
|
266 |
+
imgs += [img]
|
267 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
268 |
+
depths_h.append(depth_h)
|
269 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
270 |
+
|
271 |
+
near_fars.append(self.all_near_fars[vid])
|
272 |
+
intrinsics.append(self.all_intrinsics[vid])
|
273 |
+
|
274 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
275 |
+
|
276 |
+
|
277 |
+
# ! estimate scale_mat
|
278 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
279 |
+
img_hw=[img_wh[1], img_wh[0]],
|
280 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
281 |
+
near_fars=near_fars, factor=1.1
|
282 |
+
)
|
283 |
+
|
284 |
+
|
285 |
+
new_near_fars = []
|
286 |
+
new_w2cs = []
|
287 |
+
new_c2ws = []
|
288 |
+
new_affine_mats = []
|
289 |
+
new_depths_h = []
|
290 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
291 |
+
|
292 |
+
P = intrinsic @ extrinsic @ scale_mat
|
293 |
+
P = P[:3, :4]
|
294 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
295 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
296 |
+
w2c = np.linalg.inv(c2w)
|
297 |
+
new_w2cs.append(w2c)
|
298 |
+
new_c2ws.append(c2w)
|
299 |
+
affine_mat = np.eye(4)
|
300 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
301 |
+
new_affine_mats.append(affine_mat)
|
302 |
+
|
303 |
+
camera_o = c2w[:3, 3]
|
304 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
305 |
+
near = dist - 1
|
306 |
+
far = dist + 1
|
307 |
+
|
308 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
309 |
+
new_depths_h.append(depth * scale_factor)
|
310 |
+
|
311 |
+
# print(new_near_fars)
|
312 |
+
imgs = torch.stack(imgs).float()
|
313 |
+
depths_h = np.stack(new_depths_h)
|
314 |
+
masks_h = np.stack(masks_h)
|
315 |
+
|
316 |
+
affine_mats = np.stack(new_affine_mats)
|
317 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
318 |
+
new_near_fars)
|
319 |
+
|
320 |
+
if self.split == 'train':
|
321 |
+
start_idx = 0
|
322 |
+
else:
|
323 |
+
start_idx = 1
|
324 |
+
|
325 |
+
|
326 |
+
|
327 |
+
target_w2cs = []
|
328 |
+
target_intrinsics = []
|
329 |
+
new_target_w2cs = []
|
330 |
+
for i_idx in range(8):
|
331 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
332 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
333 |
+
|
334 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
335 |
+
|
336 |
+
P = intrinsic @ extrinsic @ scale_mat
|
337 |
+
P = P[:3, :4]
|
338 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
339 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
340 |
+
w2c = np.linalg.inv(c2w)
|
341 |
+
new_target_w2cs.append(w2c)
|
342 |
+
target_w2cs = np.stack(new_target_w2cs)
|
343 |
+
|
344 |
+
|
345 |
+
|
346 |
+
view_ids = [idx] + list(src_views)
|
347 |
+
sample['origin_idx'] = origin_idx
|
348 |
+
sample['images'] = imgs # (V, 3, H, W)
|
349 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
350 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
351 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
352 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
353 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
354 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
355 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
356 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
357 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
358 |
+
|
359 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
360 |
+
sample['scan'] = shape_name
|
361 |
+
|
362 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
363 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
364 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
365 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
366 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
367 |
+
# print("meta: ", sample['meta'])
|
368 |
+
|
369 |
+
# - image to render
|
370 |
+
sample['query_image'] = sample['images'][0]
|
371 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
372 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
373 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
374 |
+
sample['query_depth'] = sample['depths_h'][0]
|
375 |
+
sample['query_mask'] = sample['masks_h'][0]
|
376 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
377 |
+
|
378 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
379 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
380 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
381 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
382 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
383 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
384 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
385 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
386 |
+
|
387 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
388 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
389 |
+
|
390 |
+
# - generate rays
|
391 |
+
if ('val' in self.split) or ('test' in self.split):
|
392 |
+
sample_rays = gen_rays_from_single_image(
|
393 |
+
img_wh[1], img_wh[0],
|
394 |
+
sample['query_image'],
|
395 |
+
sample['query_intrinsic'],
|
396 |
+
sample['query_c2w'],
|
397 |
+
depth=sample['query_depth'],
|
398 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
399 |
+
else:
|
400 |
+
sample_rays = gen_random_rays_from_single_image(
|
401 |
+
img_wh[1], img_wh[0],
|
402 |
+
self.N_rays,
|
403 |
+
sample['query_image'],
|
404 |
+
sample['query_intrinsic'],
|
405 |
+
sample['query_c2w'],
|
406 |
+
depth=sample['query_depth'],
|
407 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
408 |
+
dilated_mask=mask_dilated,
|
409 |
+
importance_sample=self.importance_sample)
|
410 |
+
|
411 |
+
|
412 |
+
sample['rays'] = sample_rays
|
413 |
+
|
414 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_32_wide.py
ADDED
@@ -0,0 +1,465 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
def calc_pose(phis, thetas, size, radius = 1.2):
|
19 |
+
import torch
|
20 |
+
def normalize(vectors):
|
21 |
+
return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
|
22 |
+
# device = torch.device('cuda')
|
23 |
+
thetas = torch.FloatTensor(thetas)
|
24 |
+
phis = torch.FloatTensor(phis)
|
25 |
+
|
26 |
+
centers = torch.stack([
|
27 |
+
radius * torch.sin(thetas) * torch.sin(phis),
|
28 |
+
-radius * torch.cos(thetas) * torch.sin(phis),
|
29 |
+
radius * torch.cos(phis),
|
30 |
+
], dim=-1) # [B, 3]
|
31 |
+
|
32 |
+
# lookat
|
33 |
+
forward_vector = normalize(centers).squeeze(0)
|
34 |
+
up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
|
35 |
+
right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
|
36 |
+
if right_vector.pow(2).sum() < 0.01:
|
37 |
+
right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
|
38 |
+
up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
|
39 |
+
|
40 |
+
poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
|
41 |
+
poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
|
42 |
+
poses[:, :3, 3] = centers
|
43 |
+
return poses
|
44 |
+
|
45 |
+
def get_ray_directions(H, W, focal, center=None):
|
46 |
+
"""
|
47 |
+
Get ray directions for all pixels in camera coordinate.
|
48 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
49 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
50 |
+
Inputs:
|
51 |
+
H, W, focal: image height, width and focal length
|
52 |
+
Outputs:
|
53 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
54 |
+
"""
|
55 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
56 |
+
|
57 |
+
i, j = grid.unbind(-1)
|
58 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
59 |
+
# see https://github.com/bmild/nerf/issues/24
|
60 |
+
cent = center if center is not None else [W / 2, H / 2]
|
61 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
62 |
+
|
63 |
+
return directions
|
64 |
+
|
65 |
+
def load_K_Rt_from_P(filename, P=None):
|
66 |
+
if P is None:
|
67 |
+
lines = open(filename).read().splitlines()
|
68 |
+
if len(lines) == 4:
|
69 |
+
lines = lines[1:]
|
70 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
71 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
72 |
+
|
73 |
+
out = cv2.decomposeProjectionMatrix(P)
|
74 |
+
K = out[0]
|
75 |
+
R = out[1]
|
76 |
+
t = out[2]
|
77 |
+
|
78 |
+
K = K / K[2, 2]
|
79 |
+
intrinsics = np.eye(4)
|
80 |
+
intrinsics[:3, :3] = K
|
81 |
+
|
82 |
+
pose = np.eye(4, dtype=np.float32)
|
83 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
84 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
85 |
+
|
86 |
+
return intrinsics, pose # ! return cam2world matrix here
|
87 |
+
|
88 |
+
|
89 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
90 |
+
class BlenderPerView(Dataset):
|
91 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
92 |
+
split_filepath=None, pair_filepath=None,
|
93 |
+
N_rays=512,
|
94 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
95 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
96 |
+
specific_dataset_name = 'GSO'
|
97 |
+
):
|
98 |
+
|
99 |
+
# print("root_dir: ", root_dir)
|
100 |
+
self.root_dir = root_dir
|
101 |
+
self.split = split
|
102 |
+
# self.specific_dataset_name = 'Realfusion'
|
103 |
+
# self.specific_dataset_name = 'GSO'
|
104 |
+
# self.specific_dataset_name = 'Objaverse'
|
105 |
+
# self.specific_dataset_name = 'Zero123'
|
106 |
+
|
107 |
+
self.specific_dataset_name = specific_dataset_name
|
108 |
+
self.n_views = n_views
|
109 |
+
self.N_rays = N_rays
|
110 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
111 |
+
|
112 |
+
self.clean_image = clean_image
|
113 |
+
self.importance_sample = importance_sample
|
114 |
+
self.test_ref_views = test_ref_views # used for testing
|
115 |
+
self.scale_factor = 1.0
|
116 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
117 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
118 |
+
# find all subfolders
|
119 |
+
main_folder = os.path.join(root_dir)
|
120 |
+
self.shape_list = os.listdir(main_folder)
|
121 |
+
self.shape_list.sort()
|
122 |
+
|
123 |
+
# self.shape_list = ['barrel_render']
|
124 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
125 |
+
|
126 |
+
|
127 |
+
self.lvis_paths = []
|
128 |
+
for shape_name in self.shape_list:
|
129 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
130 |
+
|
131 |
+
# print("lvis_paths: ", self.lvis_paths)
|
132 |
+
|
133 |
+
if img_wh is not None:
|
134 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
135 |
+
'img_wh must both be multiples of 32!'
|
136 |
+
|
137 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
138 |
+
|
139 |
+
with open(pose_json_path, 'r') as f:
|
140 |
+
meta = json.load(f)
|
141 |
+
intrinsic = np.eye(4)
|
142 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
143 |
+
self.intrinsic = intrinsic
|
144 |
+
self.near_far = np.array(meta["near_far"])
|
145 |
+
self.near_far[1] = 1.8
|
146 |
+
|
147 |
+
# * bounding box for rendering
|
148 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
149 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
150 |
+
|
151 |
+
# - used for cost volume regularization
|
152 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
153 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
154 |
+
|
155 |
+
|
156 |
+
def define_transforms(self):
|
157 |
+
self.transform = T.Compose([T.ToTensor()])
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
def load_cam_info(self):
|
162 |
+
for vid in range(self.input_poses.shape[0]):
|
163 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
164 |
+
self.all_intrinsics.append(intrinsic)
|
165 |
+
self.all_extrinsics.append(extrinsic)
|
166 |
+
self.all_near_fars.append(near_far)
|
167 |
+
|
168 |
+
def read_depth(self, filename):
|
169 |
+
pass
|
170 |
+
|
171 |
+
def read_mask(self, filename):
|
172 |
+
mask_h = cv2.imread(filename, 0)
|
173 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
174 |
+
interpolation=cv2.INTER_NEAREST)
|
175 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
176 |
+
interpolation=cv2.INTER_NEAREST)
|
177 |
+
|
178 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
179 |
+
mask_h[mask_h > 0] = 1
|
180 |
+
|
181 |
+
return mask, mask_h
|
182 |
+
|
183 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
184 |
+
|
185 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
186 |
+
|
187 |
+
radius = radius * factor
|
188 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
189 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
190 |
+
scale_mat = scale_mat.astype(np.float32)
|
191 |
+
|
192 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
193 |
+
|
194 |
+
def __len__(self):
|
195 |
+
# return 8*len(self.lvis_paths)
|
196 |
+
return len(self.lvis_paths)
|
197 |
+
|
198 |
+
|
199 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
200 |
+
pass
|
201 |
+
|
202 |
+
|
203 |
+
def __getitem__(self, idx):
|
204 |
+
sample = {}
|
205 |
+
idx = idx * 8 # to be deleted
|
206 |
+
origin_idx = idx
|
207 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
208 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
209 |
+
|
210 |
+
folder_path = self.lvis_paths[idx//8]
|
211 |
+
idx = idx % 8 # [0, 7]
|
212 |
+
|
213 |
+
# last subdir name
|
214 |
+
shape_name = os.path.split(folder_path)[-1]
|
215 |
+
|
216 |
+
# pose_json_path = os.path.join(folder_path, "pose.json")
|
217 |
+
# with open(pose_json_path, 'r') as f:
|
218 |
+
# meta = json.load(f)
|
219 |
+
|
220 |
+
# self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
221 |
+
# self.img_wh = (256, 256)
|
222 |
+
# self.input_poses = np.array(list(meta["c2ws"].values()))
|
223 |
+
# intrinsic = np.eye(4)
|
224 |
+
# intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
225 |
+
# self.intrinsic = intrinsic
|
226 |
+
# self.near_far = np.array(meta["near_far"])
|
227 |
+
# self.near_far[1] = 1.8
|
228 |
+
# self.define_transforms()
|
229 |
+
# self.blender2opencv = np.array(
|
230 |
+
# [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
231 |
+
# )
|
232 |
+
|
233 |
+
pose_file = os.path.join(folder_path, '32_random', 'views.npz')
|
234 |
+
pose_array = np.load(pose_file)
|
235 |
+
pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
|
236 |
+
|
237 |
+
self.img_wh = (256, 256)
|
238 |
+
self.input_poses = np.array(pose)
|
239 |
+
self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
|
240 |
+
self.define_transforms()
|
241 |
+
self.blender2opencv = np.array(
|
242 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
243 |
+
)
|
244 |
+
|
245 |
+
self.c2ws = []
|
246 |
+
self.w2cs = []
|
247 |
+
self.near_fars = []
|
248 |
+
# self.root_dir = root_dir
|
249 |
+
for image_dix in range(pose.shape[0]):
|
250 |
+
pose = self.input_poses[image_dix]
|
251 |
+
c2w = pose @ self.blender2opencv
|
252 |
+
self.c2ws.append(c2w)
|
253 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
254 |
+
self.near_fars.append(self.near_far)
|
255 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
256 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
257 |
+
|
258 |
+
|
259 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
260 |
+
self.all_extrinsics = []
|
261 |
+
self.all_near_fars = []
|
262 |
+
self.load_cam_info()
|
263 |
+
|
264 |
+
|
265 |
+
# target view
|
266 |
+
c2w = self.c2ws[idx]
|
267 |
+
w2c = np.linalg.inv(c2w)
|
268 |
+
w2c_ref = w2c
|
269 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
270 |
+
|
271 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
272 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
273 |
+
|
274 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
275 |
+
img_filename = os.path.join(folder_path, '32_random', f'{idx}.png')
|
276 |
+
|
277 |
+
img = Image.open(img_filename)
|
278 |
+
img = self.transform(img) # (4, h, w)
|
279 |
+
|
280 |
+
|
281 |
+
if img.shape[0] == 4:
|
282 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
283 |
+
imgs += [img]
|
284 |
+
|
285 |
+
|
286 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
287 |
+
depth_h = depth_h.fill_(-1.0)
|
288 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
289 |
+
|
290 |
+
|
291 |
+
depths_h.append(depth_h)
|
292 |
+
masks_h.append(mask_h)
|
293 |
+
|
294 |
+
intrinsic = self.intrinsic
|
295 |
+
intrinsics.append(intrinsic)
|
296 |
+
|
297 |
+
|
298 |
+
near_fars.append(self.near_fars[idx])
|
299 |
+
image_perm = 0 # only supervised on reference view
|
300 |
+
|
301 |
+
mask_dilated = None
|
302 |
+
|
303 |
+
|
304 |
+
src_views = range(0, 8 * 4)
|
305 |
+
|
306 |
+
for vid in src_views:
|
307 |
+
|
308 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
309 |
+
img_filename = os.path.join(folder_path, '32_random', f'{vid}.png')
|
310 |
+
img = Image.open(img_filename)
|
311 |
+
img_wh = self.img_wh
|
312 |
+
|
313 |
+
img = self.transform(img)
|
314 |
+
if img.shape[0] == 4:
|
315 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
316 |
+
|
317 |
+
imgs += [img]
|
318 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
319 |
+
depths_h.append(depth_h)
|
320 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
321 |
+
|
322 |
+
near_fars.append(self.all_near_fars[vid])
|
323 |
+
intrinsics.append(self.all_intrinsics[vid])
|
324 |
+
|
325 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
326 |
+
|
327 |
+
|
328 |
+
# ! estimate scale_mat
|
329 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
330 |
+
img_hw=[img_wh[1], img_wh[0]],
|
331 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
332 |
+
near_fars=near_fars, factor=1.1
|
333 |
+
)
|
334 |
+
|
335 |
+
|
336 |
+
new_near_fars = []
|
337 |
+
new_w2cs = []
|
338 |
+
new_c2ws = []
|
339 |
+
new_affine_mats = []
|
340 |
+
new_depths_h = []
|
341 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
342 |
+
|
343 |
+
P = intrinsic @ extrinsic @ scale_mat
|
344 |
+
P = P[:3, :4]
|
345 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
346 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
347 |
+
w2c = np.linalg.inv(c2w)
|
348 |
+
new_w2cs.append(w2c)
|
349 |
+
new_c2ws.append(c2w)
|
350 |
+
affine_mat = np.eye(4)
|
351 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
352 |
+
new_affine_mats.append(affine_mat)
|
353 |
+
|
354 |
+
camera_o = c2w[:3, 3]
|
355 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
356 |
+
near = dist - 1
|
357 |
+
far = dist + 1
|
358 |
+
|
359 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
360 |
+
new_depths_h.append(depth * scale_factor)
|
361 |
+
|
362 |
+
# print(new_near_fars)
|
363 |
+
imgs = torch.stack(imgs).float()
|
364 |
+
depths_h = np.stack(new_depths_h)
|
365 |
+
masks_h = np.stack(masks_h)
|
366 |
+
|
367 |
+
affine_mats = np.stack(new_affine_mats)
|
368 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
369 |
+
new_near_fars)
|
370 |
+
|
371 |
+
if self.split == 'train':
|
372 |
+
start_idx = 0
|
373 |
+
else:
|
374 |
+
start_idx = 1
|
375 |
+
|
376 |
+
|
377 |
+
|
378 |
+
target_w2cs = []
|
379 |
+
target_intrinsics = []
|
380 |
+
new_target_w2cs = []
|
381 |
+
for i_idx in range(8):
|
382 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
383 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
384 |
+
|
385 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
386 |
+
|
387 |
+
P = intrinsic @ extrinsic @ scale_mat
|
388 |
+
P = P[:3, :4]
|
389 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
390 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
391 |
+
w2c = np.linalg.inv(c2w)
|
392 |
+
new_target_w2cs.append(w2c)
|
393 |
+
target_w2cs = np.stack(new_target_w2cs)
|
394 |
+
|
395 |
+
|
396 |
+
|
397 |
+
view_ids = [idx] + list(src_views)
|
398 |
+
sample['origin_idx'] = origin_idx
|
399 |
+
sample['images'] = imgs # (V, 3, H, W)
|
400 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
401 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
402 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
403 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
404 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
405 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
406 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
407 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
408 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
409 |
+
|
410 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
411 |
+
sample['scan'] = shape_name
|
412 |
+
|
413 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
414 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
415 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
416 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
417 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
418 |
+
# print("meta: ", sample['meta'])
|
419 |
+
|
420 |
+
# - image to render
|
421 |
+
sample['query_image'] = sample['images'][0]
|
422 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
423 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
424 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
425 |
+
sample['query_depth'] = sample['depths_h'][0]
|
426 |
+
sample['query_mask'] = sample['masks_h'][0]
|
427 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
428 |
+
|
429 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
430 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
431 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
432 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
433 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
434 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
435 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
436 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
437 |
+
|
438 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
439 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
440 |
+
|
441 |
+
# - generate rays
|
442 |
+
if ('val' in self.split) or ('test' in self.split):
|
443 |
+
sample_rays = gen_rays_from_single_image(
|
444 |
+
img_wh[1], img_wh[0],
|
445 |
+
sample['query_image'],
|
446 |
+
sample['query_intrinsic'],
|
447 |
+
sample['query_c2w'],
|
448 |
+
depth=sample['query_depth'],
|
449 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
450 |
+
else:
|
451 |
+
sample_rays = gen_random_rays_from_single_image(
|
452 |
+
img_wh[1], img_wh[0],
|
453 |
+
self.N_rays,
|
454 |
+
sample['query_image'],
|
455 |
+
sample['query_intrinsic'],
|
456 |
+
sample['query_c2w'],
|
457 |
+
depth=sample['query_depth'],
|
458 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
459 |
+
dilated_mask=mask_dilated,
|
460 |
+
importance_sample=self.importance_sample)
|
461 |
+
|
462 |
+
|
463 |
+
sample['rays'] = sample_rays
|
464 |
+
|
465 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_4_4.py
ADDED
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
# return 8*len(self.lvis_paths)
|
161 |
+
return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
src_views = range(8, 8 + 8 * 4)
|
258 |
+
|
259 |
+
for vid in src_views:
|
260 |
+
if (vid // 4) % 2 != 0:
|
261 |
+
continue
|
262 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
263 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
264 |
+
img = Image.open(img_filename)
|
265 |
+
img_wh = self.img_wh
|
266 |
+
|
267 |
+
img = self.transform(img)
|
268 |
+
if img.shape[0] == 4:
|
269 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
270 |
+
|
271 |
+
imgs += [img]
|
272 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
273 |
+
depths_h.append(depth_h)
|
274 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
275 |
+
|
276 |
+
near_fars.append(self.all_near_fars[vid])
|
277 |
+
intrinsics.append(self.all_intrinsics[vid])
|
278 |
+
|
279 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
280 |
+
|
281 |
+
|
282 |
+
# ! estimate scale_mat
|
283 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
284 |
+
img_hw=[img_wh[1], img_wh[0]],
|
285 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
286 |
+
near_fars=near_fars, factor=1.1
|
287 |
+
)
|
288 |
+
|
289 |
+
|
290 |
+
new_near_fars = []
|
291 |
+
new_w2cs = []
|
292 |
+
new_c2ws = []
|
293 |
+
new_affine_mats = []
|
294 |
+
new_depths_h = []
|
295 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
296 |
+
|
297 |
+
P = intrinsic @ extrinsic @ scale_mat
|
298 |
+
P = P[:3, :4]
|
299 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
300 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
301 |
+
w2c = np.linalg.inv(c2w)
|
302 |
+
new_w2cs.append(w2c)
|
303 |
+
new_c2ws.append(c2w)
|
304 |
+
affine_mat = np.eye(4)
|
305 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
306 |
+
new_affine_mats.append(affine_mat)
|
307 |
+
|
308 |
+
camera_o = c2w[:3, 3]
|
309 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
310 |
+
near = dist - 1
|
311 |
+
far = dist + 1
|
312 |
+
|
313 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
314 |
+
new_depths_h.append(depth * scale_factor)
|
315 |
+
|
316 |
+
# print(new_near_fars)
|
317 |
+
imgs = torch.stack(imgs).float()
|
318 |
+
depths_h = np.stack(new_depths_h)
|
319 |
+
masks_h = np.stack(masks_h)
|
320 |
+
|
321 |
+
affine_mats = np.stack(new_affine_mats)
|
322 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
323 |
+
new_near_fars)
|
324 |
+
|
325 |
+
if self.split == 'train':
|
326 |
+
start_idx = 0
|
327 |
+
else:
|
328 |
+
start_idx = 1
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
target_w2cs = []
|
333 |
+
target_intrinsics = []
|
334 |
+
new_target_w2cs = []
|
335 |
+
for i_idx in range(8):
|
336 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
337 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
338 |
+
|
339 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
340 |
+
|
341 |
+
P = intrinsic @ extrinsic @ scale_mat
|
342 |
+
P = P[:3, :4]
|
343 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
344 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
345 |
+
w2c = np.linalg.inv(c2w)
|
346 |
+
new_target_w2cs.append(w2c)
|
347 |
+
target_w2cs = np.stack(new_target_w2cs)
|
348 |
+
|
349 |
+
|
350 |
+
|
351 |
+
view_ids = [idx] + list(src_views)
|
352 |
+
sample['origin_idx'] = origin_idx
|
353 |
+
sample['images'] = imgs # (V, 3, H, W)
|
354 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
355 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
356 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
357 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
358 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
359 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
360 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
361 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
362 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
363 |
+
|
364 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
365 |
+
sample['scan'] = shape_name
|
366 |
+
|
367 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
368 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
369 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
370 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
371 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
372 |
+
# print("meta: ", sample['meta'])
|
373 |
+
|
374 |
+
# - image to render
|
375 |
+
sample['query_image'] = sample['images'][0]
|
376 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
377 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
378 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
379 |
+
sample['query_depth'] = sample['depths_h'][0]
|
380 |
+
sample['query_mask'] = sample['masks_h'][0]
|
381 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
382 |
+
|
383 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
384 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
385 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
386 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
387 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
388 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
389 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
390 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
391 |
+
|
392 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
393 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
394 |
+
|
395 |
+
# - generate rays
|
396 |
+
if ('val' in self.split) or ('test' in self.split):
|
397 |
+
sample_rays = gen_rays_from_single_image(
|
398 |
+
img_wh[1], img_wh[0],
|
399 |
+
sample['query_image'],
|
400 |
+
sample['query_intrinsic'],
|
401 |
+
sample['query_c2w'],
|
402 |
+
depth=sample['query_depth'],
|
403 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
404 |
+
else:
|
405 |
+
sample_rays = gen_random_rays_from_single_image(
|
406 |
+
img_wh[1], img_wh[0],
|
407 |
+
self.N_rays,
|
408 |
+
sample['query_image'],
|
409 |
+
sample['query_intrinsic'],
|
410 |
+
sample['query_c2w'],
|
411 |
+
depth=sample['query_depth'],
|
412 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
413 |
+
dilated_mask=mask_dilated,
|
414 |
+
importance_sample=self.importance_sample)
|
415 |
+
|
416 |
+
|
417 |
+
sample['rays'] = sample_rays
|
418 |
+
|
419 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_6_4.py
ADDED
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
# return 8*len(self.lvis_paths)
|
161 |
+
return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
src_views = range(8, 8 + 8 * 4)
|
258 |
+
|
259 |
+
for vid in src_views:
|
260 |
+
if ((vid - 8) // 4 == 4) or ((vid - 8) // 4 == 6):
|
261 |
+
continue
|
262 |
+
|
263 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
264 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
265 |
+
img = Image.open(img_filename)
|
266 |
+
img_wh = self.img_wh
|
267 |
+
|
268 |
+
img = self.transform(img)
|
269 |
+
if img.shape[0] == 4:
|
270 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
271 |
+
|
272 |
+
imgs += [img]
|
273 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
274 |
+
depths_h.append(depth_h)
|
275 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
276 |
+
|
277 |
+
near_fars.append(self.all_near_fars[vid])
|
278 |
+
intrinsics.append(self.all_intrinsics[vid])
|
279 |
+
|
280 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
281 |
+
|
282 |
+
|
283 |
+
# ! estimate scale_mat
|
284 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
285 |
+
img_hw=[img_wh[1], img_wh[0]],
|
286 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
287 |
+
near_fars=near_fars, factor=1.1
|
288 |
+
)
|
289 |
+
|
290 |
+
|
291 |
+
new_near_fars = []
|
292 |
+
new_w2cs = []
|
293 |
+
new_c2ws = []
|
294 |
+
new_affine_mats = []
|
295 |
+
new_depths_h = []
|
296 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
297 |
+
|
298 |
+
P = intrinsic @ extrinsic @ scale_mat
|
299 |
+
P = P[:3, :4]
|
300 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
301 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
302 |
+
w2c = np.linalg.inv(c2w)
|
303 |
+
new_w2cs.append(w2c)
|
304 |
+
new_c2ws.append(c2w)
|
305 |
+
affine_mat = np.eye(4)
|
306 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
307 |
+
new_affine_mats.append(affine_mat)
|
308 |
+
|
309 |
+
camera_o = c2w[:3, 3]
|
310 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
311 |
+
near = dist - 1
|
312 |
+
far = dist + 1
|
313 |
+
|
314 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
315 |
+
new_depths_h.append(depth * scale_factor)
|
316 |
+
|
317 |
+
# print(new_near_fars)
|
318 |
+
imgs = torch.stack(imgs).float()
|
319 |
+
depths_h = np.stack(new_depths_h)
|
320 |
+
masks_h = np.stack(masks_h)
|
321 |
+
|
322 |
+
affine_mats = np.stack(new_affine_mats)
|
323 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
324 |
+
new_near_fars)
|
325 |
+
|
326 |
+
if self.split == 'train':
|
327 |
+
start_idx = 0
|
328 |
+
else:
|
329 |
+
start_idx = 1
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
+
target_w2cs = []
|
334 |
+
target_intrinsics = []
|
335 |
+
new_target_w2cs = []
|
336 |
+
for i_idx in range(8):
|
337 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
338 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
339 |
+
|
340 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
341 |
+
|
342 |
+
P = intrinsic @ extrinsic @ scale_mat
|
343 |
+
P = P[:3, :4]
|
344 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
345 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
346 |
+
w2c = np.linalg.inv(c2w)
|
347 |
+
new_target_w2cs.append(w2c)
|
348 |
+
target_w2cs = np.stack(new_target_w2cs)
|
349 |
+
|
350 |
+
|
351 |
+
|
352 |
+
view_ids = [idx] + list(src_views)
|
353 |
+
sample['origin_idx'] = origin_idx
|
354 |
+
sample['images'] = imgs # (V, 3, H, W)
|
355 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
356 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
357 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
358 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
359 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
360 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
361 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
362 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
363 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
364 |
+
|
365 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
366 |
+
sample['scan'] = shape_name
|
367 |
+
|
368 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
369 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
370 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
371 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
372 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
373 |
+
# print("meta: ", sample['meta'])
|
374 |
+
|
375 |
+
# - image to render
|
376 |
+
sample['query_image'] = sample['images'][0]
|
377 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
378 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
379 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
380 |
+
sample['query_depth'] = sample['depths_h'][0]
|
381 |
+
sample['query_mask'] = sample['masks_h'][0]
|
382 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
383 |
+
|
384 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
385 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
386 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
387 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
388 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
389 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
390 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
391 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
392 |
+
|
393 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
394 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
395 |
+
|
396 |
+
# - generate rays
|
397 |
+
if ('val' in self.split) or ('test' in self.split):
|
398 |
+
sample_rays = gen_rays_from_single_image(
|
399 |
+
img_wh[1], img_wh[0],
|
400 |
+
sample['query_image'],
|
401 |
+
sample['query_intrinsic'],
|
402 |
+
sample['query_c2w'],
|
403 |
+
depth=sample['query_depth'],
|
404 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
405 |
+
else:
|
406 |
+
sample_rays = gen_random_rays_from_single_image(
|
407 |
+
img_wh[1], img_wh[0],
|
408 |
+
self.N_rays,
|
409 |
+
sample['query_image'],
|
410 |
+
sample['query_intrinsic'],
|
411 |
+
sample['query_c2w'],
|
412 |
+
depth=sample['query_depth'],
|
413 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
414 |
+
dilated_mask=mask_dilated,
|
415 |
+
importance_sample=self.importance_sample)
|
416 |
+
|
417 |
+
|
418 |
+
sample['rays'] = sample_rays
|
419 |
+
|
420 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_3.py
ADDED
@@ -0,0 +1,428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
# return 8*len(self.lvis_paths)
|
161 |
+
return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
# src_views = range(8, 8 + 8 * 4)
|
258 |
+
|
259 |
+
src_views = list()
|
260 |
+
for i in range(8):
|
261 |
+
# randomly choose 3 different number from [0,3]
|
262 |
+
# local_idxs = np.random.choice(4, 3, replace=False)
|
263 |
+
local_idxs = [0, 2, 3]
|
264 |
+
# local_idxs = np.random.choice(4, 3, replace=False)
|
265 |
+
|
266 |
+
local_idxs = [8 + i * 4 + local_idx for local_idx in local_idxs]
|
267 |
+
src_views += local_idxs
|
268 |
+
|
269 |
+
for vid in src_views:
|
270 |
+
|
271 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
272 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
273 |
+
img = Image.open(img_filename)
|
274 |
+
img_wh = self.img_wh
|
275 |
+
|
276 |
+
img = self.transform(img)
|
277 |
+
if img.shape[0] == 4:
|
278 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
279 |
+
|
280 |
+
imgs += [img]
|
281 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
282 |
+
depths_h.append(depth_h)
|
283 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
284 |
+
|
285 |
+
near_fars.append(self.all_near_fars[vid])
|
286 |
+
intrinsics.append(self.all_intrinsics[vid])
|
287 |
+
|
288 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
289 |
+
|
290 |
+
|
291 |
+
# ! estimate scale_mat
|
292 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
293 |
+
img_hw=[img_wh[1], img_wh[0]],
|
294 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
295 |
+
near_fars=near_fars, factor=1.1
|
296 |
+
)
|
297 |
+
|
298 |
+
|
299 |
+
new_near_fars = []
|
300 |
+
new_w2cs = []
|
301 |
+
new_c2ws = []
|
302 |
+
new_affine_mats = []
|
303 |
+
new_depths_h = []
|
304 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
305 |
+
|
306 |
+
P = intrinsic @ extrinsic @ scale_mat
|
307 |
+
P = P[:3, :4]
|
308 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
309 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
310 |
+
w2c = np.linalg.inv(c2w)
|
311 |
+
new_w2cs.append(w2c)
|
312 |
+
new_c2ws.append(c2w)
|
313 |
+
affine_mat = np.eye(4)
|
314 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
315 |
+
new_affine_mats.append(affine_mat)
|
316 |
+
|
317 |
+
camera_o = c2w[:3, 3]
|
318 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
319 |
+
near = dist - 1
|
320 |
+
far = dist + 1
|
321 |
+
|
322 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
323 |
+
new_depths_h.append(depth * scale_factor)
|
324 |
+
|
325 |
+
# print(new_near_fars)
|
326 |
+
imgs = torch.stack(imgs).float()
|
327 |
+
depths_h = np.stack(new_depths_h)
|
328 |
+
masks_h = np.stack(masks_h)
|
329 |
+
|
330 |
+
affine_mats = np.stack(new_affine_mats)
|
331 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
332 |
+
new_near_fars)
|
333 |
+
|
334 |
+
if self.split == 'train':
|
335 |
+
start_idx = 0
|
336 |
+
else:
|
337 |
+
start_idx = 1
|
338 |
+
|
339 |
+
|
340 |
+
|
341 |
+
target_w2cs = []
|
342 |
+
target_intrinsics = []
|
343 |
+
new_target_w2cs = []
|
344 |
+
for i_idx in range(8):
|
345 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
346 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
347 |
+
|
348 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
349 |
+
|
350 |
+
P = intrinsic @ extrinsic @ scale_mat
|
351 |
+
P = P[:3, :4]
|
352 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
353 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
354 |
+
w2c = np.linalg.inv(c2w)
|
355 |
+
new_target_w2cs.append(w2c)
|
356 |
+
target_w2cs = np.stack(new_target_w2cs)
|
357 |
+
|
358 |
+
|
359 |
+
|
360 |
+
view_ids = [idx] + list(src_views)
|
361 |
+
sample['origin_idx'] = origin_idx
|
362 |
+
sample['images'] = imgs # (V, 3, H, W)
|
363 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
364 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
365 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
366 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
367 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
368 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
369 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
370 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
371 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
372 |
+
|
373 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
374 |
+
sample['scan'] = shape_name
|
375 |
+
|
376 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
377 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
378 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
379 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
380 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
381 |
+
# print("meta: ", sample['meta'])
|
382 |
+
|
383 |
+
# - image to render
|
384 |
+
sample['query_image'] = sample['images'][0]
|
385 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
386 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
387 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
388 |
+
sample['query_depth'] = sample['depths_h'][0]
|
389 |
+
sample['query_mask'] = sample['masks_h'][0]
|
390 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
391 |
+
|
392 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
393 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
394 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
395 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
396 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
397 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
398 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
399 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
400 |
+
|
401 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
402 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
403 |
+
|
404 |
+
# - generate rays
|
405 |
+
if ('val' in self.split) or ('test' in self.split):
|
406 |
+
sample_rays = gen_rays_from_single_image(
|
407 |
+
img_wh[1], img_wh[0],
|
408 |
+
sample['query_image'],
|
409 |
+
sample['query_intrinsic'],
|
410 |
+
sample['query_c2w'],
|
411 |
+
depth=sample['query_depth'],
|
412 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
413 |
+
else:
|
414 |
+
sample_rays = gen_random_rays_from_single_image(
|
415 |
+
img_wh[1], img_wh[0],
|
416 |
+
self.N_rays,
|
417 |
+
sample['query_image'],
|
418 |
+
sample['query_intrinsic'],
|
419 |
+
sample['query_c2w'],
|
420 |
+
depth=sample['query_depth'],
|
421 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
422 |
+
dilated_mask=mask_dilated,
|
423 |
+
importance_sample=self.importance_sample)
|
424 |
+
|
425 |
+
|
426 |
+
sample['rays'] = sample_rays
|
427 |
+
|
428 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_8_wide.py
ADDED
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
# self.specific_dataset_name = 'Zero123'
|
80 |
+
|
81 |
+
self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ['barrel_render']
|
98 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
99 |
+
|
100 |
+
|
101 |
+
self.lvis_paths = []
|
102 |
+
for shape_name in self.shape_list:
|
103 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
104 |
+
|
105 |
+
# print("lvis_paths: ", self.lvis_paths)
|
106 |
+
|
107 |
+
if img_wh is not None:
|
108 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
109 |
+
'img_wh must both be multiples of 32!'
|
110 |
+
|
111 |
+
|
112 |
+
# * bounding box for rendering
|
113 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
114 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
115 |
+
|
116 |
+
# - used for cost volume regularization
|
117 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
118 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
119 |
+
|
120 |
+
|
121 |
+
def define_transforms(self):
|
122 |
+
self.transform = T.Compose([T.ToTensor()])
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
def load_cam_info(self):
|
127 |
+
for vid, img_id in enumerate(self.img_ids):
|
128 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
129 |
+
self.all_intrinsics.append(intrinsic)
|
130 |
+
self.all_extrinsics.append(extrinsic)
|
131 |
+
self.all_near_fars.append(near_far)
|
132 |
+
|
133 |
+
def read_depth(self, filename):
|
134 |
+
pass
|
135 |
+
|
136 |
+
def read_mask(self, filename):
|
137 |
+
mask_h = cv2.imread(filename, 0)
|
138 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
139 |
+
interpolation=cv2.INTER_NEAREST)
|
140 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
141 |
+
interpolation=cv2.INTER_NEAREST)
|
142 |
+
|
143 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
144 |
+
mask_h[mask_h > 0] = 1
|
145 |
+
|
146 |
+
return mask, mask_h
|
147 |
+
|
148 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
149 |
+
|
150 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
151 |
+
|
152 |
+
radius = radius * factor
|
153 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
154 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
155 |
+
scale_mat = scale_mat.astype(np.float32)
|
156 |
+
|
157 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
158 |
+
|
159 |
+
def __len__(self):
|
160 |
+
# return 8*len(self.lvis_paths)
|
161 |
+
return len(self.lvis_paths)
|
162 |
+
|
163 |
+
|
164 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
165 |
+
pass
|
166 |
+
|
167 |
+
|
168 |
+
def __getitem__(self, idx):
|
169 |
+
sample = {}
|
170 |
+
idx = idx * 8 # to be deleted
|
171 |
+
origin_idx = idx
|
172 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
173 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
174 |
+
|
175 |
+
folder_path = self.lvis_paths[idx//8]
|
176 |
+
idx = idx % 8 # [0, 7]
|
177 |
+
|
178 |
+
# last subdir name
|
179 |
+
shape_name = os.path.split(folder_path)[-1]
|
180 |
+
|
181 |
+
pose_json_path = os.path.join(folder_path, "pose.json")
|
182 |
+
with open(pose_json_path, 'r') as f:
|
183 |
+
meta = json.load(f)
|
184 |
+
|
185 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
186 |
+
self.img_wh = (256, 256)
|
187 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
188 |
+
intrinsic = np.eye(4)
|
189 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
190 |
+
self.intrinsic = intrinsic
|
191 |
+
self.near_far = np.array(meta["near_far"])
|
192 |
+
self.near_far[1] = 1.8
|
193 |
+
self.define_transforms()
|
194 |
+
self.blender2opencv = np.array(
|
195 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
196 |
+
)
|
197 |
+
|
198 |
+
self.c2ws = []
|
199 |
+
self.w2cs = []
|
200 |
+
self.near_fars = []
|
201 |
+
# self.root_dir = root_dir
|
202 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
203 |
+
pose = self.input_poses[image_dix]
|
204 |
+
c2w = pose @ self.blender2opencv
|
205 |
+
self.c2ws.append(c2w)
|
206 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
207 |
+
self.near_fars.append(self.near_far)
|
208 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
209 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
210 |
+
|
211 |
+
|
212 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
213 |
+
self.all_extrinsics = []
|
214 |
+
self.all_near_fars = []
|
215 |
+
self.load_cam_info()
|
216 |
+
|
217 |
+
|
218 |
+
# target view
|
219 |
+
c2w = self.c2ws[idx]
|
220 |
+
w2c = np.linalg.inv(c2w)
|
221 |
+
w2c_ref = w2c
|
222 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
223 |
+
|
224 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
225 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
226 |
+
|
227 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
228 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[idx]}')
|
229 |
+
|
230 |
+
img = Image.open(img_filename)
|
231 |
+
img = self.transform(img) # (4, h, w)
|
232 |
+
|
233 |
+
|
234 |
+
if img.shape[0] == 4:
|
235 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
236 |
+
imgs += [img]
|
237 |
+
|
238 |
+
|
239 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
240 |
+
depth_h = depth_h.fill_(-1.0)
|
241 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
242 |
+
|
243 |
+
|
244 |
+
depths_h.append(depth_h)
|
245 |
+
masks_h.append(mask_h)
|
246 |
+
|
247 |
+
intrinsic = self.intrinsic
|
248 |
+
intrinsics.append(intrinsic)
|
249 |
+
|
250 |
+
|
251 |
+
near_fars.append(self.near_fars[idx])
|
252 |
+
image_perm = 0 # only supervised on reference view
|
253 |
+
|
254 |
+
mask_dilated = None
|
255 |
+
|
256 |
+
|
257 |
+
src_views = range(8)
|
258 |
+
|
259 |
+
|
260 |
+
for vid in src_views:
|
261 |
+
|
262 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
263 |
+
# img_filename = os.path.join(folder_path, 'stage2_8', f'{self.img_ids[vid]}')
|
264 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{self.img_ids[vid]}')
|
265 |
+
img = Image.open(img_filename)
|
266 |
+
img_wh = self.img_wh
|
267 |
+
|
268 |
+
img = self.transform(img)
|
269 |
+
if img.shape[0] == 4:
|
270 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
271 |
+
|
272 |
+
imgs += [img]
|
273 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
274 |
+
depths_h.append(depth_h)
|
275 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
276 |
+
|
277 |
+
near_fars.append(self.all_near_fars[vid])
|
278 |
+
intrinsics.append(self.all_intrinsics[vid])
|
279 |
+
|
280 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
281 |
+
|
282 |
+
|
283 |
+
# ! estimate scale_mat
|
284 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
285 |
+
img_hw=[img_wh[1], img_wh[0]],
|
286 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
287 |
+
near_fars=near_fars, factor=1.1
|
288 |
+
)
|
289 |
+
|
290 |
+
|
291 |
+
new_near_fars = []
|
292 |
+
new_w2cs = []
|
293 |
+
new_c2ws = []
|
294 |
+
new_affine_mats = []
|
295 |
+
new_depths_h = []
|
296 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
297 |
+
|
298 |
+
P = intrinsic @ extrinsic @ scale_mat
|
299 |
+
P = P[:3, :4]
|
300 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
301 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
302 |
+
w2c = np.linalg.inv(c2w)
|
303 |
+
new_w2cs.append(w2c)
|
304 |
+
new_c2ws.append(c2w)
|
305 |
+
affine_mat = np.eye(4)
|
306 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
307 |
+
new_affine_mats.append(affine_mat)
|
308 |
+
|
309 |
+
camera_o = c2w[:3, 3]
|
310 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
311 |
+
near = dist - 1
|
312 |
+
far = dist + 1
|
313 |
+
|
314 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
315 |
+
new_depths_h.append(depth * scale_factor)
|
316 |
+
|
317 |
+
# print(new_near_fars)
|
318 |
+
imgs = torch.stack(imgs).float()
|
319 |
+
depths_h = np.stack(new_depths_h)
|
320 |
+
masks_h = np.stack(masks_h)
|
321 |
+
|
322 |
+
affine_mats = np.stack(new_affine_mats)
|
323 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
324 |
+
new_near_fars)
|
325 |
+
|
326 |
+
if self.split == 'train':
|
327 |
+
start_idx = 0
|
328 |
+
else:
|
329 |
+
start_idx = 1
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
+
target_w2cs = []
|
334 |
+
target_intrinsics = []
|
335 |
+
new_target_w2cs = []
|
336 |
+
for i_idx in range(8):
|
337 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
338 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
339 |
+
|
340 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
341 |
+
|
342 |
+
P = intrinsic @ extrinsic @ scale_mat
|
343 |
+
P = P[:3, :4]
|
344 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
345 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
346 |
+
w2c = np.linalg.inv(c2w)
|
347 |
+
new_target_w2cs.append(w2c)
|
348 |
+
target_w2cs = np.stack(new_target_w2cs)
|
349 |
+
|
350 |
+
|
351 |
+
|
352 |
+
view_ids = [idx] + list(src_views)
|
353 |
+
sample['origin_idx'] = origin_idx
|
354 |
+
sample['images'] = imgs # (V, 3, H, W)
|
355 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
356 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
357 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
358 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
359 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
360 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
361 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
362 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
363 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
364 |
+
|
365 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
366 |
+
sample['scan'] = shape_name
|
367 |
+
|
368 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
369 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
370 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
371 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
372 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
373 |
+
# print("meta: ", sample['meta'])
|
374 |
+
|
375 |
+
# - image to render
|
376 |
+
sample['query_image'] = sample['images'][0]
|
377 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
378 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
379 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
380 |
+
sample['query_depth'] = sample['depths_h'][0]
|
381 |
+
sample['query_mask'] = sample['masks_h'][0]
|
382 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
383 |
+
|
384 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
385 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
386 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
387 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
388 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
389 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
390 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
391 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
392 |
+
|
393 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
394 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
395 |
+
|
396 |
+
# - generate rays
|
397 |
+
if ('val' in self.split) or ('test' in self.split):
|
398 |
+
sample_rays = gen_rays_from_single_image(
|
399 |
+
img_wh[1], img_wh[0],
|
400 |
+
sample['query_image'],
|
401 |
+
sample['query_intrinsic'],
|
402 |
+
sample['query_c2w'],
|
403 |
+
depth=sample['query_depth'],
|
404 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
405 |
+
else:
|
406 |
+
sample_rays = gen_random_rays_from_single_image(
|
407 |
+
img_wh[1], img_wh[0],
|
408 |
+
self.N_rays,
|
409 |
+
sample['query_image'],
|
410 |
+
sample['query_intrinsic'],
|
411 |
+
sample['query_c2w'],
|
412 |
+
depth=sample['query_depth'],
|
413 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
414 |
+
dilated_mask=mask_dilated,
|
415 |
+
importance_sample=self.importance_sample)
|
416 |
+
|
417 |
+
|
418 |
+
sample['rays'] = sample_rays
|
419 |
+
|
420 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_eval_new_data_temp.py
ADDED
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
|
18 |
+
|
19 |
+
def get_ray_directions(H, W, focal, center=None):
|
20 |
+
"""
|
21 |
+
Get ray directions for all pixels in camera coordinate.
|
22 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
23 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
24 |
+
Inputs:
|
25 |
+
H, W, focal: image height, width and focal length
|
26 |
+
Outputs:
|
27 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
28 |
+
"""
|
29 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
30 |
+
|
31 |
+
i, j = grid.unbind(-1)
|
32 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
33 |
+
# see https://github.com/bmild/nerf/issues/24
|
34 |
+
cent = center if center is not None else [W / 2, H / 2]
|
35 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
36 |
+
|
37 |
+
return directions
|
38 |
+
|
39 |
+
def load_K_Rt_from_P(filename, P=None):
|
40 |
+
if P is None:
|
41 |
+
lines = open(filename).read().splitlines()
|
42 |
+
if len(lines) == 4:
|
43 |
+
lines = lines[1:]
|
44 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
45 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
46 |
+
|
47 |
+
out = cv2.decomposeProjectionMatrix(P)
|
48 |
+
K = out[0]
|
49 |
+
R = out[1]
|
50 |
+
t = out[2]
|
51 |
+
|
52 |
+
K = K / K[2, 2]
|
53 |
+
intrinsics = np.eye(4)
|
54 |
+
intrinsics[:3, :3] = K
|
55 |
+
|
56 |
+
pose = np.eye(4, dtype=np.float32)
|
57 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
58 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
59 |
+
|
60 |
+
return intrinsics, pose # ! return cam2world matrix here
|
61 |
+
|
62 |
+
|
63 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
64 |
+
class BlenderPerView(Dataset):
|
65 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
66 |
+
split_filepath=None, pair_filepath=None,
|
67 |
+
N_rays=512,
|
68 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
69 |
+
clean_image=False, importance_sample=False, test_ref_views=[],
|
70 |
+
specific_dataset_name = 'GSO'
|
71 |
+
):
|
72 |
+
|
73 |
+
# print("root_dir: ", root_dir)
|
74 |
+
self.root_dir = root_dir
|
75 |
+
self.split = split
|
76 |
+
# self.specific_dataset_name = 'Realfusion'
|
77 |
+
# self.specific_dataset_name = 'GSO'
|
78 |
+
# self.specific_dataset_name = 'Objaverse'
|
79 |
+
self.specific_dataset_name = 'Objaverse_archived'
|
80 |
+
|
81 |
+
# self.specific_dataset_name = specific_dataset_name
|
82 |
+
self.n_views = n_views
|
83 |
+
self.N_rays = N_rays
|
84 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
85 |
+
|
86 |
+
self.clean_image = clean_image
|
87 |
+
self.importance_sample = importance_sample
|
88 |
+
self.test_ref_views = test_ref_views # used for testing
|
89 |
+
self.scale_factor = 1.0
|
90 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
91 |
+
assert self.split == 'val' or 'export_mesh', 'only support val or export_mesh'
|
92 |
+
# find all subfolders
|
93 |
+
main_folder = os.path.join(root_dir, self.specific_dataset_name)
|
94 |
+
self.shape_list = os.listdir(main_folder)
|
95 |
+
self.shape_list.sort()
|
96 |
+
|
97 |
+
# self.shape_list = ["barrel", "bag", "mailbox", "shoe", "chair", "car", "dog", "teddy"] # TO BE DELETED
|
98 |
+
|
99 |
+
|
100 |
+
self.lvis_paths = []
|
101 |
+
for shape_name in self.shape_list:
|
102 |
+
self.lvis_paths.append(os.path.join(main_folder, shape_name))
|
103 |
+
|
104 |
+
# print("lvis_paths: ", self.lvis_paths)
|
105 |
+
|
106 |
+
if img_wh is not None:
|
107 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
108 |
+
'img_wh must both be multiples of 32!'
|
109 |
+
|
110 |
+
|
111 |
+
# * bounding box for rendering
|
112 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
113 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
114 |
+
|
115 |
+
# - used for cost volume regularization
|
116 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
117 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
118 |
+
|
119 |
+
|
120 |
+
def define_transforms(self):
|
121 |
+
self.transform = T.Compose([T.ToTensor()])
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
def load_cam_info(self):
|
126 |
+
for vid, img_id in enumerate(self.img_ids):
|
127 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
128 |
+
self.all_intrinsics.append(intrinsic)
|
129 |
+
self.all_extrinsics.append(extrinsic)
|
130 |
+
self.all_near_fars.append(near_far)
|
131 |
+
|
132 |
+
def read_depth(self, filename):
|
133 |
+
pass
|
134 |
+
|
135 |
+
def read_mask(self, filename):
|
136 |
+
mask_h = cv2.imread(filename, 0)
|
137 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
138 |
+
interpolation=cv2.INTER_NEAREST)
|
139 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
140 |
+
interpolation=cv2.INTER_NEAREST)
|
141 |
+
|
142 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
143 |
+
mask_h[mask_h > 0] = 1
|
144 |
+
|
145 |
+
return mask, mask_h
|
146 |
+
|
147 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
148 |
+
|
149 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
150 |
+
|
151 |
+
radius = radius * factor
|
152 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
153 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
154 |
+
scale_mat = scale_mat.astype(np.float32)
|
155 |
+
|
156 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
157 |
+
|
158 |
+
def __len__(self):
|
159 |
+
# return 8*len(self.lvis_paths)
|
160 |
+
return len(self.lvis_paths)
|
161 |
+
|
162 |
+
|
163 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
164 |
+
pass
|
165 |
+
|
166 |
+
|
167 |
+
def __getitem__(self, idx):
|
168 |
+
sample = {}
|
169 |
+
idx = idx * 8 # to be deleted
|
170 |
+
origin_idx = idx
|
171 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
172 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj-mats between views
|
173 |
+
|
174 |
+
folder_path = self.lvis_paths[idx//8]
|
175 |
+
idx = idx % 8 # [0, 7]
|
176 |
+
|
177 |
+
# last subdir name
|
178 |
+
shape_name = os.path.split(folder_path)[-1]
|
179 |
+
|
180 |
+
pose_json_path = os.path.join('/objaverse-processed/zero12345_img/zero12345_narrow_pose.json')
|
181 |
+
with open(pose_json_path, 'r') as f:
|
182 |
+
meta = json.load(f)
|
183 |
+
|
184 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
185 |
+
self.img_wh = (256, 256)
|
186 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
187 |
+
intrinsic = np.eye(4)
|
188 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
189 |
+
self.intrinsic = intrinsic
|
190 |
+
self.near_far = np.array(meta["near_far"])
|
191 |
+
self.near_far[1] = 1.8
|
192 |
+
self.define_transforms()
|
193 |
+
self.blender2opencv = np.array(
|
194 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
195 |
+
)
|
196 |
+
|
197 |
+
self.c2ws = []
|
198 |
+
self.w2cs = []
|
199 |
+
self.near_fars = []
|
200 |
+
# self.root_dir = root_dir
|
201 |
+
for image_dix, img_id in enumerate(self.img_ids):
|
202 |
+
pose = self.input_poses[image_dix]
|
203 |
+
c2w = pose @ self.blender2opencv
|
204 |
+
self.c2ws.append(c2w)
|
205 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
206 |
+
self.near_fars.append(self.near_far)
|
207 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
208 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
209 |
+
|
210 |
+
|
211 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
212 |
+
self.all_extrinsics = []
|
213 |
+
self.all_near_fars = []
|
214 |
+
self.load_cam_info()
|
215 |
+
|
216 |
+
|
217 |
+
# target view
|
218 |
+
c2w = self.c2ws[idx]
|
219 |
+
w2c = np.linalg.inv(c2w)
|
220 |
+
w2c_ref = w2c
|
221 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
222 |
+
|
223 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
224 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
225 |
+
|
226 |
+
# img_filename = os.path.join(folder_path, 'stage1_8_debug', f'{self.img_ids[idx]}')
|
227 |
+
img_filename = os.path.join(folder_path, 'stage1_8', f'{idx}.png')
|
228 |
+
|
229 |
+
img = Image.open(img_filename)
|
230 |
+
img = self.transform(img) # (4, h, w)
|
231 |
+
|
232 |
+
|
233 |
+
if img.shape[0] == 4:
|
234 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
235 |
+
imgs += [img]
|
236 |
+
|
237 |
+
|
238 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
239 |
+
depth_h = depth_h.fill_(-1.0)
|
240 |
+
mask_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.int32)
|
241 |
+
|
242 |
+
|
243 |
+
depths_h.append(depth_h)
|
244 |
+
masks_h.append(mask_h)
|
245 |
+
|
246 |
+
intrinsic = self.intrinsic
|
247 |
+
intrinsics.append(intrinsic)
|
248 |
+
|
249 |
+
|
250 |
+
near_fars.append(self.near_fars[idx])
|
251 |
+
image_perm = 0 # only supervised on reference view
|
252 |
+
|
253 |
+
mask_dilated = None
|
254 |
+
|
255 |
+
|
256 |
+
src_views = range(8, 8 + 8 * 4)
|
257 |
+
|
258 |
+
for vid in src_views:
|
259 |
+
|
260 |
+
# img_filename = os.path.join(folder_path, 'stage2_8_debug', f'{self.img_ids[vid]}')
|
261 |
+
img_filename = os.path.join(folder_path, 'stage2_8', f'{(vid-8)//4}_{(vid-8)%4}.png')
|
262 |
+
img = Image.open(img_filename)
|
263 |
+
img_wh = self.img_wh
|
264 |
+
|
265 |
+
img = self.transform(img)
|
266 |
+
if img.shape[0] == 4:
|
267 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
268 |
+
|
269 |
+
imgs += [img]
|
270 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
271 |
+
depths_h.append(depth_h)
|
272 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
273 |
+
|
274 |
+
near_fars.append(self.all_near_fars[vid])
|
275 |
+
intrinsics.append(self.all_intrinsics[vid])
|
276 |
+
|
277 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
278 |
+
|
279 |
+
|
280 |
+
# ! estimate scale_mat
|
281 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
282 |
+
img_hw=[img_wh[1], img_wh[0]],
|
283 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
284 |
+
near_fars=near_fars, factor=1.1
|
285 |
+
)
|
286 |
+
|
287 |
+
|
288 |
+
new_near_fars = []
|
289 |
+
new_w2cs = []
|
290 |
+
new_c2ws = []
|
291 |
+
new_affine_mats = []
|
292 |
+
new_depths_h = []
|
293 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
294 |
+
|
295 |
+
P = intrinsic @ extrinsic @ scale_mat
|
296 |
+
P = P[:3, :4]
|
297 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
298 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
299 |
+
w2c = np.linalg.inv(c2w)
|
300 |
+
new_w2cs.append(w2c)
|
301 |
+
new_c2ws.append(c2w)
|
302 |
+
affine_mat = np.eye(4)
|
303 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
304 |
+
new_affine_mats.append(affine_mat)
|
305 |
+
|
306 |
+
camera_o = c2w[:3, 3]
|
307 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
308 |
+
near = dist - 1
|
309 |
+
far = dist + 1
|
310 |
+
|
311 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
312 |
+
new_depths_h.append(depth * scale_factor)
|
313 |
+
|
314 |
+
# print(new_near_fars)
|
315 |
+
imgs = torch.stack(imgs).float()
|
316 |
+
depths_h = np.stack(new_depths_h)
|
317 |
+
masks_h = np.stack(masks_h)
|
318 |
+
|
319 |
+
affine_mats = np.stack(new_affine_mats)
|
320 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
321 |
+
new_near_fars)
|
322 |
+
|
323 |
+
if self.split == 'train':
|
324 |
+
start_idx = 0
|
325 |
+
else:
|
326 |
+
start_idx = 1
|
327 |
+
|
328 |
+
|
329 |
+
|
330 |
+
target_w2cs = []
|
331 |
+
target_intrinsics = []
|
332 |
+
new_target_w2cs = []
|
333 |
+
for i_idx in range(8):
|
334 |
+
target_w2cs.append(self.all_extrinsics[i_idx] @ w2c_ref_inv)
|
335 |
+
target_intrinsics.append(self.all_intrinsics[i_idx])
|
336 |
+
|
337 |
+
for intrinsic, extrinsic in zip(target_intrinsics, target_w2cs):
|
338 |
+
|
339 |
+
P = intrinsic @ extrinsic @ scale_mat
|
340 |
+
P = P[:3, :4]
|
341 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
342 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
343 |
+
w2c = np.linalg.inv(c2w)
|
344 |
+
new_target_w2cs.append(w2c)
|
345 |
+
target_w2cs = np.stack(new_target_w2cs)
|
346 |
+
|
347 |
+
|
348 |
+
|
349 |
+
view_ids = [idx] + list(src_views)
|
350 |
+
sample['origin_idx'] = origin_idx
|
351 |
+
sample['images'] = imgs # (V, 3, H, W)
|
352 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
353 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
354 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
355 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
356 |
+
sample['target_candidate_w2cs'] = torch.from_numpy(target_w2cs.astype(np.float32)) # (8, 4, 4)
|
357 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
358 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
359 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
360 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
361 |
+
|
362 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
363 |
+
sample['scan'] = shape_name
|
364 |
+
|
365 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
366 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
367 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
368 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
369 |
+
sample['meta'] = str(self.specific_dataset_name) + '_' + str(shape_name) + "_refview" + str(view_ids[0])
|
370 |
+
# print("meta: ", sample['meta'])
|
371 |
+
|
372 |
+
# - image to render
|
373 |
+
sample['query_image'] = sample['images'][0]
|
374 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
375 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
376 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
377 |
+
sample['query_depth'] = sample['depths_h'][0]
|
378 |
+
sample['query_mask'] = sample['masks_h'][0]
|
379 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
380 |
+
|
381 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
382 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
383 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
384 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
385 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
386 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
387 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
388 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
389 |
+
|
390 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
391 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
392 |
+
|
393 |
+
# - generate rays
|
394 |
+
if ('val' in self.split) or ('test' in self.split):
|
395 |
+
sample_rays = gen_rays_from_single_image(
|
396 |
+
img_wh[1], img_wh[0],
|
397 |
+
sample['query_image'],
|
398 |
+
sample['query_intrinsic'],
|
399 |
+
sample['query_c2w'],
|
400 |
+
depth=sample['query_depth'],
|
401 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
402 |
+
else:
|
403 |
+
sample_rays = gen_random_rays_from_single_image(
|
404 |
+
img_wh[1], img_wh[0],
|
405 |
+
self.N_rays,
|
406 |
+
sample['query_image'],
|
407 |
+
sample['query_intrinsic'],
|
408 |
+
sample['query_c2w'],
|
409 |
+
depth=sample['query_depth'],
|
410 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
411 |
+
dilated_mask=mask_dilated,
|
412 |
+
importance_sample=self.importance_sample)
|
413 |
+
|
414 |
+
|
415 |
+
sample['rays'] = sample_rays
|
416 |
+
|
417 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_no_depth.py
ADDED
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
sample = {}
|
192 |
+
origin_idx = idx
|
193 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
194 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
195 |
+
|
196 |
+
|
197 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
198 |
+
idx = idx % 8 # [0, 7]
|
199 |
+
folder_id = folder_uid_dict['folder_id']
|
200 |
+
uid = folder_uid_dict['uid']
|
201 |
+
|
202 |
+
|
203 |
+
# target view
|
204 |
+
c2w = self.c2ws[idx]
|
205 |
+
w2c = np.linalg.inv(c2w)
|
206 |
+
w2c_ref = w2c
|
207 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
208 |
+
|
209 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
210 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
211 |
+
|
212 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
213 |
+
|
214 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
215 |
+
|
216 |
+
|
217 |
+
img = Image.open(img_filename)
|
218 |
+
|
219 |
+
img = self.transform(img) # (4, h, w)
|
220 |
+
|
221 |
+
|
222 |
+
if img.shape[0] == 4:
|
223 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
224 |
+
imgs += [img]
|
225 |
+
|
226 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
227 |
+
mask_h = depth_h > 0
|
228 |
+
# print("valid pixels", np.sum(mask_h))
|
229 |
+
# directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
230 |
+
# surface_points = directions * depth_h[..., None] # [H, W, 3]
|
231 |
+
# distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
232 |
+
# depth_h = distance
|
233 |
+
|
234 |
+
depth_h = torch.ones((img.shape[1], img.shape[2]), dtype=torch.float32)
|
235 |
+
depth_h = depth_h.fill_(-1.0)
|
236 |
+
|
237 |
+
depths_h.append(depth_h)
|
238 |
+
masks_h.append(mask_h)
|
239 |
+
|
240 |
+
intrinsic = self.intrinsic
|
241 |
+
intrinsics.append(intrinsic)
|
242 |
+
|
243 |
+
|
244 |
+
near_fars.append(self.near_fars[idx])
|
245 |
+
image_perm = 0 # only supervised on reference view
|
246 |
+
|
247 |
+
mask_dilated = None
|
248 |
+
|
249 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
250 |
+
src_views = range(8, 8 + 8 * 4)
|
251 |
+
|
252 |
+
for vid in src_views:
|
253 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
254 |
+
|
255 |
+
img = Image.open(img_filename)
|
256 |
+
img_wh = self.img_wh
|
257 |
+
|
258 |
+
img = self.transform(img)
|
259 |
+
if img.shape[0] == 4:
|
260 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
261 |
+
|
262 |
+
imgs += [img]
|
263 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
264 |
+
depths_h.append(depth_h)
|
265 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
266 |
+
|
267 |
+
near_fars.append(self.all_near_fars[vid])
|
268 |
+
intrinsics.append(self.all_intrinsics[vid])
|
269 |
+
|
270 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
271 |
+
|
272 |
+
|
273 |
+
# ! estimate scale_mat
|
274 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
275 |
+
img_hw=[img_wh[1], img_wh[0]],
|
276 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
277 |
+
near_fars=near_fars, factor=1.1
|
278 |
+
)
|
279 |
+
|
280 |
+
|
281 |
+
new_near_fars = []
|
282 |
+
new_w2cs = []
|
283 |
+
new_c2ws = []
|
284 |
+
new_affine_mats = []
|
285 |
+
new_depths_h = []
|
286 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
287 |
+
|
288 |
+
P = intrinsic @ extrinsic @ scale_mat
|
289 |
+
P = P[:3, :4]
|
290 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
291 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
292 |
+
w2c = np.linalg.inv(c2w)
|
293 |
+
new_w2cs.append(w2c)
|
294 |
+
new_c2ws.append(c2w)
|
295 |
+
affine_mat = np.eye(4)
|
296 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
297 |
+
new_affine_mats.append(affine_mat)
|
298 |
+
|
299 |
+
camera_o = c2w[:3, 3]
|
300 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
301 |
+
near = dist - 1
|
302 |
+
far = dist + 1
|
303 |
+
|
304 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
305 |
+
new_depths_h.append(depth * scale_factor)
|
306 |
+
|
307 |
+
# print(new_near_fars)
|
308 |
+
imgs = torch.stack(imgs).float()
|
309 |
+
depths_h = np.stack(new_depths_h)
|
310 |
+
masks_h = np.stack(masks_h)
|
311 |
+
|
312 |
+
affine_mats = np.stack(new_affine_mats)
|
313 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
314 |
+
new_near_fars)
|
315 |
+
|
316 |
+
if self.split == 'train':
|
317 |
+
start_idx = 0
|
318 |
+
else:
|
319 |
+
start_idx = 1
|
320 |
+
|
321 |
+
view_ids = [idx] + list(src_views)
|
322 |
+
sample['origin_idx'] = origin_idx
|
323 |
+
sample['images'] = imgs # (V, 3, H, W)
|
324 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
325 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
326 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
327 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
328 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
329 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
330 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
331 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
332 |
+
|
333 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
334 |
+
sample['scan'] = folder_id
|
335 |
+
|
336 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
337 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
338 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
339 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
340 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
341 |
+
|
342 |
+
|
343 |
+
# - image to render
|
344 |
+
sample['query_image'] = sample['images'][0]
|
345 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
346 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
347 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
348 |
+
sample['query_depth'] = sample['depths_h'][0]
|
349 |
+
sample['query_mask'] = sample['masks_h'][0]
|
350 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
351 |
+
|
352 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
353 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
354 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
355 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
356 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
357 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
358 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
359 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
360 |
+
|
361 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
362 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
363 |
+
|
364 |
+
# - generate rays
|
365 |
+
if ('val' in self.split) or ('test' in self.split):
|
366 |
+
sample_rays = gen_rays_from_single_image(
|
367 |
+
img_wh[1], img_wh[0],
|
368 |
+
sample['query_image'],
|
369 |
+
sample['query_intrinsic'],
|
370 |
+
sample['query_c2w'],
|
371 |
+
depth=sample['query_depth'],
|
372 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
373 |
+
else:
|
374 |
+
sample_rays = gen_random_rays_from_single_image(
|
375 |
+
img_wh[1], img_wh[0],
|
376 |
+
self.N_rays,
|
377 |
+
sample['query_image'],
|
378 |
+
sample['query_intrinsic'],
|
379 |
+
sample['query_c2w'],
|
380 |
+
depth=sample['query_depth'],
|
381 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
382 |
+
dilated_mask=mask_dilated,
|
383 |
+
importance_sample=self.importance_sample)
|
384 |
+
|
385 |
+
|
386 |
+
sample['rays'] = sample_rays
|
387 |
+
|
388 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4.py
ADDED
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 4*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
idx = idx * 2
|
192 |
+
sample = {}
|
193 |
+
origin_idx = idx
|
194 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
195 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
196 |
+
|
197 |
+
|
198 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
199 |
+
idx = idx % 8 # [0, 7]
|
200 |
+
folder_id = folder_uid_dict['folder_id']
|
201 |
+
uid = folder_uid_dict['uid']
|
202 |
+
|
203 |
+
|
204 |
+
# target view
|
205 |
+
c2w = self.c2ws[idx]
|
206 |
+
w2c = np.linalg.inv(c2w)
|
207 |
+
w2c_ref = w2c
|
208 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
209 |
+
|
210 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
211 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
212 |
+
|
213 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
214 |
+
|
215 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
216 |
+
|
217 |
+
|
218 |
+
img = Image.open(img_filename)
|
219 |
+
|
220 |
+
img = self.transform(img) # (4, h, w)
|
221 |
+
|
222 |
+
|
223 |
+
if img.shape[0] == 4:
|
224 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
225 |
+
imgs += [img]
|
226 |
+
|
227 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
228 |
+
mask_h = depth_h > 0
|
229 |
+
# print("valid pixels", np.sum(mask_h))
|
230 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
231 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
232 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
233 |
+
depth_h = distance
|
234 |
+
|
235 |
+
|
236 |
+
depths_h.append(depth_h)
|
237 |
+
masks_h.append(mask_h)
|
238 |
+
|
239 |
+
intrinsic = self.intrinsic
|
240 |
+
intrinsics.append(intrinsic)
|
241 |
+
|
242 |
+
|
243 |
+
near_fars.append(self.near_fars[idx])
|
244 |
+
image_perm = 0 # only supervised on reference view
|
245 |
+
|
246 |
+
mask_dilated = None
|
247 |
+
|
248 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
249 |
+
src_views = range(8, 8 + 8 * 4)
|
250 |
+
|
251 |
+
for vid in src_views:
|
252 |
+
if (vid // 4) % 2 != 0:
|
253 |
+
continue
|
254 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
255 |
+
|
256 |
+
img = Image.open(img_filename)
|
257 |
+
img_wh = self.img_wh
|
258 |
+
|
259 |
+
img = self.transform(img)
|
260 |
+
if img.shape[0] == 4:
|
261 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
262 |
+
|
263 |
+
imgs += [img]
|
264 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
265 |
+
depths_h.append(depth_h)
|
266 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
267 |
+
|
268 |
+
near_fars.append(self.all_near_fars[vid])
|
269 |
+
intrinsics.append(self.all_intrinsics[vid])
|
270 |
+
|
271 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
272 |
+
|
273 |
+
# print("len(imgs)", len(imgs))
|
274 |
+
# ! estimate scale_mat
|
275 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
276 |
+
img_hw=[img_wh[1], img_wh[0]],
|
277 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
278 |
+
near_fars=near_fars, factor=1.1
|
279 |
+
)
|
280 |
+
|
281 |
+
|
282 |
+
new_near_fars = []
|
283 |
+
new_w2cs = []
|
284 |
+
new_c2ws = []
|
285 |
+
new_affine_mats = []
|
286 |
+
new_depths_h = []
|
287 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
288 |
+
|
289 |
+
P = intrinsic @ extrinsic @ scale_mat
|
290 |
+
P = P[:3, :4]
|
291 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
292 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
293 |
+
w2c = np.linalg.inv(c2w)
|
294 |
+
new_w2cs.append(w2c)
|
295 |
+
new_c2ws.append(c2w)
|
296 |
+
affine_mat = np.eye(4)
|
297 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
298 |
+
new_affine_mats.append(affine_mat)
|
299 |
+
|
300 |
+
camera_o = c2w[:3, 3]
|
301 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
302 |
+
near = dist - 1
|
303 |
+
far = dist + 1
|
304 |
+
|
305 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
306 |
+
new_depths_h.append(depth * scale_factor)
|
307 |
+
|
308 |
+
# print(new_near_fars)
|
309 |
+
imgs = torch.stack(imgs).float()
|
310 |
+
depths_h = np.stack(new_depths_h)
|
311 |
+
masks_h = np.stack(masks_h)
|
312 |
+
|
313 |
+
affine_mats = np.stack(new_affine_mats)
|
314 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
315 |
+
new_near_fars)
|
316 |
+
|
317 |
+
if self.split == 'train':
|
318 |
+
start_idx = 0
|
319 |
+
else:
|
320 |
+
start_idx = 1
|
321 |
+
|
322 |
+
view_ids = [idx] + list(src_views)
|
323 |
+
sample['origin_idx'] = origin_idx
|
324 |
+
sample['images'] = imgs # (V, 3, H, W)
|
325 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
326 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
327 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
328 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
329 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
330 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
331 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
332 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
333 |
+
|
334 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
335 |
+
sample['scan'] = folder_id
|
336 |
+
|
337 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
338 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
339 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
340 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
341 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
342 |
+
|
343 |
+
|
344 |
+
# - image to render
|
345 |
+
sample['query_image'] = sample['images'][0]
|
346 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
347 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
348 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
349 |
+
sample['query_depth'] = sample['depths_h'][0]
|
350 |
+
sample['query_mask'] = sample['masks_h'][0]
|
351 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
352 |
+
|
353 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
354 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
355 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
356 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
357 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
358 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
359 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
360 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
361 |
+
|
362 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
363 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
364 |
+
|
365 |
+
# - generate rays
|
366 |
+
if ('val' in self.split) or ('test' in self.split):
|
367 |
+
sample_rays = gen_rays_from_single_image(
|
368 |
+
img_wh[1], img_wh[0],
|
369 |
+
sample['query_image'],
|
370 |
+
sample['query_intrinsic'],
|
371 |
+
sample['query_c2w'],
|
372 |
+
depth=sample['query_depth'],
|
373 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
374 |
+
else:
|
375 |
+
sample_rays = gen_random_rays_from_single_image(
|
376 |
+
img_wh[1], img_wh[0],
|
377 |
+
self.N_rays,
|
378 |
+
sample['query_image'],
|
379 |
+
sample['query_intrinsic'],
|
380 |
+
sample['query_c2w'],
|
381 |
+
depth=sample['query_depth'],
|
382 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
383 |
+
dilated_mask=mask_dilated,
|
384 |
+
importance_sample=self.importance_sample)
|
385 |
+
|
386 |
+
|
387 |
+
sample['rays'] = sample_rays
|
388 |
+
|
389 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_general_narrow_all_only_4_and_4.py
ADDED
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
def load_K_Rt_from_P(filename, P=None):
|
38 |
+
if P is None:
|
39 |
+
lines = open(filename).read().splitlines()
|
40 |
+
if len(lines) == 4:
|
41 |
+
lines = lines[1:]
|
42 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
43 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
44 |
+
|
45 |
+
out = cv2.decomposeProjectionMatrix(P)
|
46 |
+
K = out[0]
|
47 |
+
R = out[1]
|
48 |
+
t = out[2]
|
49 |
+
|
50 |
+
K = K / K[2, 2]
|
51 |
+
intrinsics = np.eye(4)
|
52 |
+
intrinsics[:3, :3] = K
|
53 |
+
|
54 |
+
pose = np.eye(4, dtype=np.float32)
|
55 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
56 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
57 |
+
|
58 |
+
return intrinsics, pose # ! return cam2world matrix here
|
59 |
+
|
60 |
+
|
61 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
62 |
+
class BlenderPerView(Dataset):
|
63 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
64 |
+
split_filepath=None, pair_filepath=None,
|
65 |
+
N_rays=512,
|
66 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
67 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
68 |
+
|
69 |
+
# print("root_dir: ", root_dir)
|
70 |
+
self.root_dir = root_dir
|
71 |
+
self.split = split
|
72 |
+
|
73 |
+
self.n_views = n_views
|
74 |
+
self.N_rays = N_rays
|
75 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
76 |
+
|
77 |
+
self.clean_image = clean_image
|
78 |
+
self.importance_sample = importance_sample
|
79 |
+
self.test_ref_views = test_ref_views # used for testing
|
80 |
+
self.scale_factor = 1.0
|
81 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
82 |
+
|
83 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/lvis_split.json' # folder_id and uid
|
84 |
+
with open(lvis_json_path, 'r') as f:
|
85 |
+
lvis_paths = json.load(f)
|
86 |
+
if self.split == 'train':
|
87 |
+
self.lvis_paths = lvis_paths['train']
|
88 |
+
else:
|
89 |
+
self.lvis_paths = lvis_paths['val']
|
90 |
+
if img_wh is not None:
|
91 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
92 |
+
'img_wh must both be multiples of 32!'
|
93 |
+
|
94 |
+
|
95 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
96 |
+
with open(pose_json_path, 'r') as f:
|
97 |
+
meta = json.load(f)
|
98 |
+
|
99 |
+
self.img_ids = list(meta["c2ws"].keys()) # e.g. "view_0", "view_7", "view_0_2_10"
|
100 |
+
self.img_wh = (256, 256)
|
101 |
+
self.input_poses = np.array(list(meta["c2ws"].values()))
|
102 |
+
intrinsic = np.eye(4)
|
103 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
104 |
+
self.intrinsic = intrinsic
|
105 |
+
self.near_far = np.array(meta["near_far"])
|
106 |
+
self.near_far[1] = 1.8
|
107 |
+
self.define_transforms()
|
108 |
+
self.blender2opencv = np.array(
|
109 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
self.c2ws = []
|
114 |
+
self.w2cs = []
|
115 |
+
self.near_fars = []
|
116 |
+
# self.root_dir = root_dir
|
117 |
+
for idx, img_id in enumerate(self.img_ids):
|
118 |
+
pose = self.input_poses[idx]
|
119 |
+
c2w = pose @ self.blender2opencv
|
120 |
+
self.c2ws.append(c2w)
|
121 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
122 |
+
self.near_fars.append(self.near_far)
|
123 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
124 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
125 |
+
|
126 |
+
|
127 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
128 |
+
self.all_extrinsics = []
|
129 |
+
self.all_near_fars = []
|
130 |
+
self.load_cam_info()
|
131 |
+
|
132 |
+
# * bounding box for rendering
|
133 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
134 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
135 |
+
|
136 |
+
# - used for cost volume regularization
|
137 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
138 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
139 |
+
|
140 |
+
|
141 |
+
def define_transforms(self):
|
142 |
+
self.transform = T.Compose([T.ToTensor()])
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def load_cam_info(self):
|
147 |
+
for vid, img_id in enumerate(self.img_ids):
|
148 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
149 |
+
self.all_intrinsics.append(intrinsic)
|
150 |
+
self.all_extrinsics.append(extrinsic)
|
151 |
+
self.all_near_fars.append(near_far)
|
152 |
+
|
153 |
+
def read_depth(self, filename):
|
154 |
+
pass
|
155 |
+
|
156 |
+
def read_mask(self, filename):
|
157 |
+
mask_h = cv2.imread(filename, 0)
|
158 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
159 |
+
interpolation=cv2.INTER_NEAREST)
|
160 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
161 |
+
interpolation=cv2.INTER_NEAREST)
|
162 |
+
|
163 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
164 |
+
mask_h[mask_h > 0] = 1
|
165 |
+
|
166 |
+
return mask, mask_h
|
167 |
+
|
168 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
169 |
+
|
170 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
171 |
+
# print("center", center)
|
172 |
+
# print("radius", radius)
|
173 |
+
# print("bounds", bounds)
|
174 |
+
# import ipdb; ipdb.set_trace()
|
175 |
+
radius = radius * factor
|
176 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
177 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
178 |
+
scale_mat = scale_mat.astype(np.float32)
|
179 |
+
|
180 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
181 |
+
|
182 |
+
def __len__(self):
|
183 |
+
return 8*len(self.lvis_paths)
|
184 |
+
|
185 |
+
|
186 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
187 |
+
pass
|
188 |
+
|
189 |
+
|
190 |
+
def __getitem__(self, idx):
|
191 |
+
idx = idx
|
192 |
+
sample = {}
|
193 |
+
origin_idx = idx
|
194 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
195 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
196 |
+
|
197 |
+
|
198 |
+
folder_uid_dict = self.lvis_paths[idx//8]
|
199 |
+
idx = idx % 8 # [0, 7]
|
200 |
+
folder_id = folder_uid_dict['folder_id']
|
201 |
+
uid = folder_uid_dict['uid']
|
202 |
+
|
203 |
+
|
204 |
+
# target view
|
205 |
+
c2w = self.c2ws[idx]
|
206 |
+
w2c = np.linalg.inv(c2w)
|
207 |
+
w2c_ref = w2c
|
208 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
209 |
+
|
210 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
211 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
212 |
+
|
213 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{idx}.png')
|
214 |
+
|
215 |
+
depth_filename = os.path.join(os.path.join(self.root_dir, folder_id, uid, f'view_{idx}_depth_mm.png'))
|
216 |
+
|
217 |
+
|
218 |
+
img = Image.open(img_filename)
|
219 |
+
|
220 |
+
img = self.transform(img) # (4, h, w)
|
221 |
+
|
222 |
+
|
223 |
+
if img.shape[0] == 4:
|
224 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
225 |
+
imgs += [img]
|
226 |
+
|
227 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
228 |
+
mask_h = depth_h > 0
|
229 |
+
# print("valid pixels", np.sum(mask_h))
|
230 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
231 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
232 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
233 |
+
depth_h = distance
|
234 |
+
|
235 |
+
|
236 |
+
depths_h.append(depth_h)
|
237 |
+
masks_h.append(mask_h)
|
238 |
+
|
239 |
+
intrinsic = self.intrinsic
|
240 |
+
intrinsics.append(intrinsic)
|
241 |
+
|
242 |
+
|
243 |
+
near_fars.append(self.near_fars[idx])
|
244 |
+
image_perm = 0 # only supervised on reference view
|
245 |
+
|
246 |
+
mask_dilated = None
|
247 |
+
|
248 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
249 |
+
|
250 |
+
src_views = range(8, 8 + 8 * 4)
|
251 |
+
|
252 |
+
vid_list = []
|
253 |
+
for vid in src_views:
|
254 |
+
if (vid // 4) % 2 != idx % 2:
|
255 |
+
continue
|
256 |
+
vid_list.append(vid)
|
257 |
+
img_filename = os.path.join(self.root_dir, folder_id, uid, f'view_{(vid - 8) // 4}_{vid%4}_10.png')
|
258 |
+
|
259 |
+
img = Image.open(img_filename)
|
260 |
+
img_wh = self.img_wh
|
261 |
+
|
262 |
+
img = self.transform(img)
|
263 |
+
if img.shape[0] == 4:
|
264 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
265 |
+
|
266 |
+
imgs += [img]
|
267 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
268 |
+
depths_h.append(depth_h)
|
269 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
270 |
+
|
271 |
+
near_fars.append(self.all_near_fars[vid])
|
272 |
+
intrinsics.append(self.all_intrinsics[vid])
|
273 |
+
|
274 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
275 |
+
|
276 |
+
|
277 |
+
# print("idx:", idx)
|
278 |
+
# print("len(imgs)", len(imgs))
|
279 |
+
# print("vid_list", vid_list)
|
280 |
+
# ! estimate scale_mat
|
281 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
282 |
+
img_hw=[img_wh[1], img_wh[0]],
|
283 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
284 |
+
near_fars=near_fars, factor=1.1
|
285 |
+
)
|
286 |
+
|
287 |
+
|
288 |
+
new_near_fars = []
|
289 |
+
new_w2cs = []
|
290 |
+
new_c2ws = []
|
291 |
+
new_affine_mats = []
|
292 |
+
new_depths_h = []
|
293 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
294 |
+
|
295 |
+
P = intrinsic @ extrinsic @ scale_mat
|
296 |
+
P = P[:3, :4]
|
297 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
298 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
299 |
+
w2c = np.linalg.inv(c2w)
|
300 |
+
new_w2cs.append(w2c)
|
301 |
+
new_c2ws.append(c2w)
|
302 |
+
affine_mat = np.eye(4)
|
303 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
304 |
+
new_affine_mats.append(affine_mat)
|
305 |
+
|
306 |
+
camera_o = c2w[:3, 3]
|
307 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
308 |
+
near = dist - 1
|
309 |
+
far = dist + 1
|
310 |
+
|
311 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
312 |
+
new_depths_h.append(depth * scale_factor)
|
313 |
+
|
314 |
+
# print(new_near_fars)
|
315 |
+
imgs = torch.stack(imgs).float()
|
316 |
+
depths_h = np.stack(new_depths_h)
|
317 |
+
masks_h = np.stack(masks_h)
|
318 |
+
|
319 |
+
affine_mats = np.stack(new_affine_mats)
|
320 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
321 |
+
new_near_fars)
|
322 |
+
|
323 |
+
if self.split == 'train':
|
324 |
+
start_idx = 0
|
325 |
+
else:
|
326 |
+
start_idx = 1
|
327 |
+
|
328 |
+
view_ids = [idx] + list(src_views)
|
329 |
+
sample['origin_idx'] = origin_idx
|
330 |
+
sample['images'] = imgs # (V, 3, H, W)
|
331 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
332 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
333 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
334 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
335 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
336 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
337 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
338 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
339 |
+
|
340 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
341 |
+
sample['scan'] = folder_id
|
342 |
+
|
343 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
344 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
345 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
346 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
347 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
348 |
+
|
349 |
+
|
350 |
+
# - image to render
|
351 |
+
sample['query_image'] = sample['images'][0]
|
352 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
353 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
354 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
355 |
+
sample['query_depth'] = sample['depths_h'][0]
|
356 |
+
sample['query_mask'] = sample['masks_h'][0]
|
357 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
358 |
+
|
359 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
360 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
361 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
362 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
363 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
364 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
365 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
366 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
367 |
+
|
368 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
369 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
370 |
+
|
371 |
+
# - generate rays
|
372 |
+
if ('val' in self.split) or ('test' in self.split):
|
373 |
+
sample_rays = gen_rays_from_single_image(
|
374 |
+
img_wh[1], img_wh[0],
|
375 |
+
sample['query_image'],
|
376 |
+
sample['query_intrinsic'],
|
377 |
+
sample['query_c2w'],
|
378 |
+
depth=sample['query_depth'],
|
379 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
380 |
+
else:
|
381 |
+
sample_rays = gen_random_rays_from_single_image(
|
382 |
+
img_wh[1], img_wh[0],
|
383 |
+
self.N_rays,
|
384 |
+
sample['query_image'],
|
385 |
+
sample['query_intrinsic'],
|
386 |
+
sample['query_c2w'],
|
387 |
+
depth=sample['query_depth'],
|
388 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
389 |
+
dilated_mask=mask_dilated,
|
390 |
+
importance_sample=self.importance_sample)
|
391 |
+
|
392 |
+
|
393 |
+
sample['rays'] = sample_rays
|
394 |
+
|
395 |
+
return sample
|
SparseNeuS_demo_v1/data/blender_gt_32.py
ADDED
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
import json
|
13 |
+
from termcolor import colored
|
14 |
+
import imageio
|
15 |
+
from kornia import create_meshgrid
|
16 |
+
import open3d as o3d
|
17 |
+
def get_ray_directions(H, W, focal, center=None):
|
18 |
+
"""
|
19 |
+
Get ray directions for all pixels in camera coordinate.
|
20 |
+
Reference: https://www.scratchapixel.com/lessons/3d-basic-rendering/
|
21 |
+
ray-tracing-generating-camera-rays/standard-coordinate-systems
|
22 |
+
Inputs:
|
23 |
+
H, W, focal: image height, width and focal length
|
24 |
+
Outputs:
|
25 |
+
directions: (H, W, 3), the direction of the rays in camera coordinate
|
26 |
+
"""
|
27 |
+
grid = create_meshgrid(H, W, normalized_coordinates=False)[0] + 0.5 # 1xHxWx2
|
28 |
+
|
29 |
+
i, j = grid.unbind(-1)
|
30 |
+
# the direction here is without +0.5 pixel centering as calibration is not so accurate
|
31 |
+
# see https://github.com/bmild/nerf/issues/24
|
32 |
+
cent = center if center is not None else [W / 2, H / 2]
|
33 |
+
directions = torch.stack([(i - cent[0]) / focal[0], (j - cent[1]) / focal[1], torch.ones_like(i)], -1) # (H, W, 3)
|
34 |
+
|
35 |
+
return directions
|
36 |
+
|
37 |
+
import os, json
|
38 |
+
import numpy as np
|
39 |
+
def calc_pose(phis, thetas, size, radius = 1.2):
|
40 |
+
import torch
|
41 |
+
def normalize(vectors):
|
42 |
+
return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
|
43 |
+
# device = torch.device('cuda')
|
44 |
+
thetas = torch.FloatTensor(thetas)
|
45 |
+
phis = torch.FloatTensor(phis)
|
46 |
+
|
47 |
+
centers = torch.stack([
|
48 |
+
radius * torch.sin(thetas) * torch.sin(phis),
|
49 |
+
-radius * torch.cos(thetas) * torch.sin(phis),
|
50 |
+
radius * torch.cos(phis),
|
51 |
+
], dim=-1) # [B, 3]
|
52 |
+
|
53 |
+
# lookat
|
54 |
+
forward_vector = normalize(centers).squeeze(0)
|
55 |
+
up_vector = torch.FloatTensor([0, 0, 1]).unsqueeze(0).repeat(size, 1)
|
56 |
+
right_vector = normalize(torch.cross(up_vector, forward_vector, dim=-1))
|
57 |
+
if right_vector.pow(2).sum() < 0.01:
|
58 |
+
right_vector = torch.FloatTensor([0, 1, 0]).unsqueeze(0).repeat(size, 1)
|
59 |
+
up_vector = normalize(torch.cross(forward_vector, right_vector, dim=-1))
|
60 |
+
|
61 |
+
poses = torch.eye(4, dtype=torch.float)[:3].unsqueeze(0).repeat(size, 1, 1)
|
62 |
+
poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
|
63 |
+
poses[:, :3, 3] = centers
|
64 |
+
return poses
|
65 |
+
|
66 |
+
def load_K_Rt_from_P(filename, P=None):
|
67 |
+
if P is None:
|
68 |
+
lines = open(filename).read().splitlines()
|
69 |
+
if len(lines) == 4:
|
70 |
+
lines = lines[1:]
|
71 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
72 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
73 |
+
|
74 |
+
out = cv2.decomposeProjectionMatrix(P)
|
75 |
+
K = out[0]
|
76 |
+
R = out[1]
|
77 |
+
t = out[2]
|
78 |
+
|
79 |
+
K = K / K[2, 2]
|
80 |
+
intrinsics = np.eye(4)
|
81 |
+
intrinsics[:3, :3] = K
|
82 |
+
|
83 |
+
pose = np.eye(4, dtype=np.float32)
|
84 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
85 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
86 |
+
|
87 |
+
return intrinsics, pose # ! return cam2world matrix here
|
88 |
+
|
89 |
+
|
90 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
91 |
+
class BlenderPerView(Dataset):
|
92 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(256, 256), downSample=1.0,
|
93 |
+
split_filepath=None, pair_filepath=None,
|
94 |
+
N_rays=512,
|
95 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
96 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
97 |
+
|
98 |
+
# print("root_dir: ", root_dir)
|
99 |
+
self.root_dir = root_dir
|
100 |
+
self.split = split
|
101 |
+
|
102 |
+
self.n_views = n_views
|
103 |
+
self.N_rays = N_rays
|
104 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
105 |
+
|
106 |
+
self.clean_image = clean_image
|
107 |
+
self.importance_sample = importance_sample
|
108 |
+
self.test_ref_views = test_ref_views # used for testing
|
109 |
+
self.scale_factor = 1.0
|
110 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
111 |
+
|
112 |
+
lvis_json_path = '/objaverse-processed/zero12345_img/random32_split.json' # folder_id and uid
|
113 |
+
with open(lvis_json_path, 'r') as f:
|
114 |
+
lvis_paths = json.load(f)
|
115 |
+
if self.split == 'train':
|
116 |
+
self.lvis_paths = lvis_paths['train']
|
117 |
+
else:
|
118 |
+
self.lvis_paths = lvis_paths['val']
|
119 |
+
if img_wh is not None:
|
120 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
121 |
+
'img_wh must both be multiples of 32!'
|
122 |
+
|
123 |
+
pose_json_path = "/objaverse-processed/zero12345_img/zero12345_narrow_pose.json"
|
124 |
+
|
125 |
+
with open(pose_json_path, 'r') as f:
|
126 |
+
meta = json.load(f)
|
127 |
+
intrinsic = np.eye(4)
|
128 |
+
intrinsic[:3, :3] = np.array(meta["intrinsics"])
|
129 |
+
self.intrinsic = intrinsic
|
130 |
+
self.near_far = np.array(meta["near_far"])
|
131 |
+
self.near_far[1] = 1.8
|
132 |
+
|
133 |
+
# * bounding box for rendering
|
134 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
135 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
136 |
+
|
137 |
+
# - used for cost volume regularization
|
138 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
139 |
+
self.partial_vol_origin = torch.tensor([-1., -1., -1.], dtype=torch.float32)
|
140 |
+
|
141 |
+
|
142 |
+
def define_transforms(self):
|
143 |
+
self.transform = T.Compose([T.ToTensor()])
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
def load_cam_info(self):
|
148 |
+
for vid in range(self.input_poses.shape[0]):
|
149 |
+
intrinsic, extrinsic, near_far = self.intrinsic, np.linalg.inv(self.c2ws[vid]), self.near_far
|
150 |
+
self.all_intrinsics.append(intrinsic)
|
151 |
+
self.all_extrinsics.append(extrinsic)
|
152 |
+
self.all_near_fars.append(near_far)
|
153 |
+
|
154 |
+
def read_depth(self, filename):
|
155 |
+
pass
|
156 |
+
|
157 |
+
def read_mask(self, filename):
|
158 |
+
mask_h = cv2.imread(filename, 0)
|
159 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
160 |
+
interpolation=cv2.INTER_NEAREST)
|
161 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
162 |
+
interpolation=cv2.INTER_NEAREST)
|
163 |
+
|
164 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
165 |
+
mask_h[mask_h > 0] = 1
|
166 |
+
|
167 |
+
return mask, mask_h
|
168 |
+
|
169 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
170 |
+
|
171 |
+
center, radius, bounds = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
172 |
+
# print("center", center)
|
173 |
+
# print("radius", radius)
|
174 |
+
# print("bounds", bounds)
|
175 |
+
# import ipdb; ipdb.set_trace()
|
176 |
+
radius = radius * factor
|
177 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
178 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
179 |
+
scale_mat = scale_mat.astype(np.float32)
|
180 |
+
|
181 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
182 |
+
|
183 |
+
def __len__(self):
|
184 |
+
return 32*len(self.lvis_paths)
|
185 |
+
|
186 |
+
|
187 |
+
def read_depth(self, filename, near_bound, noisy_factor=1.0):
|
188 |
+
pass
|
189 |
+
|
190 |
+
|
191 |
+
def __getitem__(self, idx):
|
192 |
+
sample = {}
|
193 |
+
origin_idx = idx
|
194 |
+
imgs, depths_h, masks_h = [], [], [] # full size (256, 256)
|
195 |
+
intrinsics, w2cs, c2ws, near_fars = [], [], [], [] # record proj mats between views
|
196 |
+
|
197 |
+
|
198 |
+
folder_uid_dict = self.lvis_paths[idx//32]
|
199 |
+
idx = idx % 32 # [0, 7]
|
200 |
+
folder_id = folder_uid_dict['folder_id']
|
201 |
+
uid = folder_uid_dict['uid']
|
202 |
+
|
203 |
+
pose_file = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, 'views.npz')
|
204 |
+
pose_array = np.load(pose_file)
|
205 |
+
pose = calc_pose(pose_array['elevations'], pose_array['azimuths'], 32) # [32, 3, 4] c2ws
|
206 |
+
|
207 |
+
self.img_wh = (256, 256)
|
208 |
+
self.input_poses = np.array(pose)
|
209 |
+
self.input_poses = np.concatenate([self.input_poses, np.tile(np.array([0, 0, 0, 1], dtype=np.float32)[None, None, :], [self.input_poses.shape[0], 1, 1])], axis=1)
|
210 |
+
self.define_transforms()
|
211 |
+
self.blender2opencv = np.array(
|
212 |
+
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]
|
213 |
+
)
|
214 |
+
|
215 |
+
self.c2ws = []
|
216 |
+
self.w2cs = []
|
217 |
+
self.near_fars = []
|
218 |
+
# self.root_dir = root_dir
|
219 |
+
for image_dix in range(pose.shape[0]):
|
220 |
+
pose = self.input_poses[image_dix]
|
221 |
+
c2w = pose @ self.blender2opencv
|
222 |
+
self.c2ws.append(c2w)
|
223 |
+
self.w2cs.append(np.linalg.inv(c2w))
|
224 |
+
self.near_fars.append(self.near_far)
|
225 |
+
self.c2ws = np.stack(self.c2ws, axis=0)
|
226 |
+
self.w2cs = np.stack(self.w2cs, axis=0)
|
227 |
+
|
228 |
+
|
229 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
230 |
+
self.all_extrinsics = []
|
231 |
+
self.all_near_fars = []
|
232 |
+
self.load_cam_info()
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
# target view
|
237 |
+
c2w = self.c2ws[idx]
|
238 |
+
w2c = np.linalg.inv(c2w)
|
239 |
+
w2c_ref = w2c
|
240 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
241 |
+
|
242 |
+
w2cs.append(w2c @ w2c_ref_inv)
|
243 |
+
c2ws.append(np.linalg.inv(w2c @ w2c_ref_inv))
|
244 |
+
|
245 |
+
img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}.png')
|
246 |
+
|
247 |
+
depth_filename = os.path.join(os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{idx}_depth_mm.png'))
|
248 |
+
|
249 |
+
|
250 |
+
img = Image.open(img_filename)
|
251 |
+
|
252 |
+
img = self.transform(img) # (4, h, w)
|
253 |
+
|
254 |
+
|
255 |
+
if img.shape[0] == 4:
|
256 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
257 |
+
imgs += [img]
|
258 |
+
|
259 |
+
depth_h = cv2.imread(depth_filename, cv2.IMREAD_UNCHANGED).astype(np.uint16) / 1000.0
|
260 |
+
mask_h = depth_h > 0
|
261 |
+
|
262 |
+
directions = get_ray_directions(self.img_wh[1], self.img_wh[0], [self.intrinsic[0, 0], self.intrinsic[1, 1]]) # [H, W, 3]
|
263 |
+
surface_points = directions * depth_h[..., None] # [H, W, 3]
|
264 |
+
distance = np.linalg.norm(surface_points, axis=-1) # [H, W]
|
265 |
+
depth_h = distance
|
266 |
+
|
267 |
+
|
268 |
+
depths_h.append(depth_h)
|
269 |
+
masks_h.append(mask_h)
|
270 |
+
|
271 |
+
intrinsic = self.intrinsic
|
272 |
+
intrinsics.append(intrinsic)
|
273 |
+
|
274 |
+
|
275 |
+
near_fars.append(self.near_fars[idx])
|
276 |
+
image_perm = 0 # only supervised on reference view
|
277 |
+
|
278 |
+
mask_dilated = None
|
279 |
+
|
280 |
+
# src_views = range(8+idx*4, 8+(idx+1)*4)
|
281 |
+
src_views = range(0, 8 * 4)
|
282 |
+
|
283 |
+
for vid in src_views:
|
284 |
+
img_filename = os.path.join('/objaverse-processed/zero12345_img/random32/', folder_id, uid, f'{vid}.png')
|
285 |
+
|
286 |
+
img = Image.open(img_filename)
|
287 |
+
img_wh = self.img_wh
|
288 |
+
|
289 |
+
img = self.transform(img)
|
290 |
+
if img.shape[0] == 4:
|
291 |
+
img = img[:3] * img[-1:] + (1 - img[-1:]) # blend A to RGB
|
292 |
+
|
293 |
+
imgs += [img]
|
294 |
+
depth_h = np.ones(img.shape[1:], dtype=np.float32)
|
295 |
+
depths_h.append(depth_h)
|
296 |
+
masks_h.append(np.ones(img.shape[1:], dtype=np.int32))
|
297 |
+
|
298 |
+
near_fars.append(self.all_near_fars[vid])
|
299 |
+
intrinsics.append(self.all_intrinsics[vid])
|
300 |
+
|
301 |
+
w2cs.append(self.all_extrinsics[vid] @ w2c_ref_inv)
|
302 |
+
|
303 |
+
|
304 |
+
# ! estimate scale_mat
|
305 |
+
scale_mat, scale_factor = self.cal_scale_mat(
|
306 |
+
img_hw=[img_wh[1], img_wh[0]],
|
307 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
308 |
+
near_fars=near_fars, factor=1.1
|
309 |
+
)
|
310 |
+
|
311 |
+
|
312 |
+
new_near_fars = []
|
313 |
+
new_w2cs = []
|
314 |
+
new_c2ws = []
|
315 |
+
new_affine_mats = []
|
316 |
+
new_depths_h = []
|
317 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
318 |
+
|
319 |
+
P = intrinsic @ extrinsic @ scale_mat
|
320 |
+
P = P[:3, :4]
|
321 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
322 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
323 |
+
w2c = np.linalg.inv(c2w)
|
324 |
+
new_w2cs.append(w2c)
|
325 |
+
new_c2ws.append(c2w)
|
326 |
+
affine_mat = np.eye(4)
|
327 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
328 |
+
new_affine_mats.append(affine_mat)
|
329 |
+
|
330 |
+
camera_o = c2w[:3, 3]
|
331 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
332 |
+
near = dist - 1
|
333 |
+
far = dist + 1
|
334 |
+
|
335 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
336 |
+
new_depths_h.append(depth * scale_factor)
|
337 |
+
|
338 |
+
# print(new_near_fars)
|
339 |
+
imgs = torch.stack(imgs).float()
|
340 |
+
depths_h = np.stack(new_depths_h)
|
341 |
+
masks_h = np.stack(masks_h)
|
342 |
+
|
343 |
+
affine_mats = np.stack(new_affine_mats)
|
344 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
345 |
+
new_near_fars)
|
346 |
+
|
347 |
+
if self.split == 'train':
|
348 |
+
start_idx = 0
|
349 |
+
else:
|
350 |
+
start_idx = 1
|
351 |
+
|
352 |
+
view_ids = [idx] + list(src_views)
|
353 |
+
sample['origin_idx'] = origin_idx
|
354 |
+
sample['images'] = imgs # (V, 3, H, W)
|
355 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
356 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
357 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
358 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
359 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
360 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
361 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
362 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
363 |
+
|
364 |
+
# sample['light_idx'] = torch.tensor(light_idx)
|
365 |
+
sample['scan'] = folder_id
|
366 |
+
|
367 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
368 |
+
sample['img_wh'] = torch.from_numpy(np.array(img_wh))
|
369 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
370 |
+
sample['partial_vol_origin'] = self.partial_vol_origin
|
371 |
+
sample['meta'] = str(folder_id) + "_" + str(uid) + "_refview" + str(view_ids[0])
|
372 |
+
|
373 |
+
|
374 |
+
# - image to render
|
375 |
+
sample['query_image'] = sample['images'][0]
|
376 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
377 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
378 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
379 |
+
sample['query_depth'] = sample['depths_h'][0]
|
380 |
+
sample['query_mask'] = sample['masks_h'][0]
|
381 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
382 |
+
|
383 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
384 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
385 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
386 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
387 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
388 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
389 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
390 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
391 |
+
|
392 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
393 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
394 |
+
|
395 |
+
# - generate rays
|
396 |
+
if ('val' in self.split) or ('test' in self.split):
|
397 |
+
sample_rays = gen_rays_from_single_image(
|
398 |
+
img_wh[1], img_wh[0],
|
399 |
+
sample['query_image'],
|
400 |
+
sample['query_intrinsic'],
|
401 |
+
sample['query_c2w'],
|
402 |
+
depth=sample['query_depth'],
|
403 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
404 |
+
else:
|
405 |
+
sample_rays = gen_random_rays_from_single_image(
|
406 |
+
img_wh[1], img_wh[0],
|
407 |
+
self.N_rays,
|
408 |
+
sample['query_image'],
|
409 |
+
sample['query_intrinsic'],
|
410 |
+
sample['query_c2w'],
|
411 |
+
depth=sample['query_depth'],
|
412 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
413 |
+
dilated_mask=mask_dilated,
|
414 |
+
importance_sample=self.importance_sample)
|
415 |
+
|
416 |
+
|
417 |
+
sample['rays'] = sample_rays
|
418 |
+
|
419 |
+
return sample
|
SparseNeuS_demo_v1/data/dtu/dtu_pairs.txt
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
46
|
2 |
+
0
|
3 |
+
10 10 2346.410000 1 2036.530000 9 1243.890000 12 1052.870000 11 1000.840000 13 703.583000 2 604.456000 8 439.759000 14 327.419000 27 249.278000
|
4 |
+
1
|
5 |
+
10 9 2850.870000 10 2583.940000 2 2105.590000 0 2052.840000 8 1868.240000 13 1184.230000 14 1017.510000 12 961.966000 7 670.208000 15 657.218000
|
6 |
+
2
|
7 |
+
10 8 2501.240000 1 2106.880000 7 1856.500000 9 1782.340000 3 1141.770000 15 1061.760000 14 815.457000 16 762.153000 6 709.789000 10 699.921000
|
8 |
+
3
|
9 |
+
10 7 1294.390000 6 1159.130000 2 1134.270000 4 905.717000 8 687.320000 5 600.015000 17 496.958000 16 481.969000 1 379.011000 15 307.450000
|
10 |
+
4
|
11 |
+
10 5 1333.740000 6 1145.150000 3 895.254000 7 486.504000 18 446.420000 2 418.517000 17 326.528000 8 161.115000 16 149.154000 1 103.626000
|
12 |
+
5
|
13 |
+
10 6 1676.060000 18 1555.060000 4 1335.550000 17 868.416000 3 593.755000 7 467.816000 20 440.579000 19 428.255000 16 242.327000 21 210.253000
|
14 |
+
6
|
15 |
+
10 17 2332.350000 7 1848.240000 18 1812.740000 5 1696.070000 16 1273.000000 3 1157.990000 4 1155.410000 20 771.624000 21 744.945000 2 700.368000
|
16 |
+
7
|
17 |
+
10 16 2709.460000 8 2439.700000 15 2078.210000 6 1864.160000 2 1846.600000 17 1791.710000 3 1296.860000 22 957.793000 9 879.088000 21 782.277000
|
18 |
+
8
|
19 |
+
10 15 3124.010000 9 3099.920000 14 2756.290000 2 2501.220000 7 2449.320000 1 1875.940000 16 1726.040000 13 1325.760000 23 1177.090000 24 1108.820000
|
20 |
+
9
|
21 |
+
10 13 3355.620000 14 3226.070000 8 3098.800000 10 3097.070000 1 2861.420000 12 1873.630000 2 1785.980000 15 1753.320000 25 1365.450000 0 1261.590000
|
22 |
+
10
|
23 |
+
10 12 3750.700000 9 3085.870000 13 3028.390000 1 2590.550000 0 2369.790000 11 2266.670000 14 1524.160000 26 1448.150000 27 1293.600000 8 1041.840000
|
24 |
+
11
|
25 |
+
10 12 3543.760000 27 3056.050000 10 2248.070000 26 1524.280000 28 1273.330000 13 1265.900000 29 1129.550000 0 998.164000 9 591.176000 30 572.919000
|
26 |
+
12
|
27 |
+
10 27 3889.870000 10 3754.540000 13 3745.210000 11 3584.260000 26 3574.560000 25 1877.110000 9 1866.340000 29 1482.720000 30 1418.510000 14 1341.860000
|
28 |
+
13
|
29 |
+
10 12 3773.140000 26 3699.280000 25 3657.170000 14 3652.040000 9 3356.290000 10 3049.270000 24 2098.910000 27 1900.960000 31 1460.960000 30 1349.620000
|
30 |
+
14
|
31 |
+
10 13 3663.520000 24 3610.690000 9 3232.550000 25 3216.400000 15 3128.840000 8 2758.040000 23 2219.910000 26 1567.450000 10 1536.600000 32 1419.330000
|
32 |
+
15
|
33 |
+
10 23 3194.920000 14 3126.000000 8 3120.430000 16 2897.020000 24 2562.490000 7 2084.050000 22 2041.630000 9 1752.080000 33 1232.290000 13 1137.550000
|
34 |
+
16
|
35 |
+
10 15 2884.140000 7 2713.880000 22 2708.570000 17 2448.500000 21 2173.300000 23 1908.030000 8 1718.790000 6 1281.960000 35 1047.380000 34 980.064000
|
36 |
+
17
|
37 |
+
10 21 2632.480000 16 2428.000000 6 2343.570000 18 2250.230000 20 2149.750000 7 1779.420000 22 1380.250000 36 957.046000 5 878.398000 15 789.068000
|
38 |
+
18
|
39 |
+
9 17 2219.150000 20 2173.020000 6 1802.390000 19 1575.770000 5 1564.810000 21 1160.130000 16 660.317000 7 589.484000 36 559.983000
|
40 |
+
19
|
41 |
+
7 20 1828.970000 18 1564.630000 17 685.249000 36 613.420000 21 572.770000 5 427.597000 6 368.651000
|
42 |
+
20
|
43 |
+
8 21 2569.790000 36 2258.330000 18 2186.710000 17 2130.670000 19 1865.060000 35 996.122000 16 799.808000 40 778.721000
|
44 |
+
21
|
45 |
+
9 36 2704.590000 35 2639.690000 17 2638.190000 20 2605.430000 22 2604.260000 16 2158.250000 34 1239.250000 18 1178.240000 40 1128.570000
|
46 |
+
22
|
47 |
+
10 23 3232.680000 34 3175.150000 35 2831.090000 16 2712.510000 21 2632.190000 15 2033.390000 33 1712.670000 17 1393.860000 36 1290.960000 24 1195.330000
|
48 |
+
23
|
49 |
+
10 24 3710.900000 33 3603.070000 22 3244.200000 15 3190.620000 34 3086.490000 14 2220.110000 32 2100.000000 16 1917.100000 35 1359.790000 25 1356.710000
|
50 |
+
24
|
51 |
+
10 25 3844.600000 32 3750.750000 23 3710.600000 14 3609.090000 33 3091.040000 15 2559.240000 31 2423.710000 13 2109.360000 26 1440.580000 34 1410.030000
|
52 |
+
25
|
53 |
+
10 26 3951.740000 31 3888.570000 24 3833.070000 13 3667.350000 14 3208.210000 32 2993.460000 30 2681.520000 12 1900.230000 45 1484.030000 27 1462.880000
|
54 |
+
26
|
55 |
+
10 30 4033.350000 27 3970.470000 25 3925.250000 13 3686.340000 12 3595.590000 29 2943.870000 31 2917.000000 14 1556.340000 11 1554.750000 46 1503.840000
|
56 |
+
27
|
57 |
+
10 29 4027.840000 26 3929.940000 12 3875.580000 11 3085.030000 28 2908.600000 30 2792.670000 13 1878.420000 25 1438.550000 47 1425.200000 10 1290.250000
|
58 |
+
28
|
59 |
+
10 29 3687.020000 48 3209.130000 27 2872.860000 47 2014.530000 30 1361.950000 11 1273.600000 26 1062.850000 12 840.841000 46 672.985000 31 271.952000
|
60 |
+
29
|
61 |
+
10 27 4029.430000 30 3909.550000 28 3739.930000 47 3695.230000 48 3135.870000 26 2910.970000 46 2229.550000 12 1479.160000 31 1430.260000 11 1144.560000
|
62 |
+
30
|
63 |
+
10 26 4029.860000 29 3953.720000 31 3811.120000 46 3630.460000 47 3105.960000 27 2824.430000 25 2657.890000 45 2347.750000 32 1459.110000 12 1429.620000
|
64 |
+
31
|
65 |
+
10 25 3882.210000 30 3841.880000 32 3808.500000 45 3649.820000 46 3000.670000 26 2939.940000 24 2409.930000 44 2381.300000 13 1467.590000 29 1459.560000
|
66 |
+
32
|
67 |
+
10 31 3826.500000 24 3744.140000 33 3613.240000 44 3552.040000 25 3004.600000 45 2884.590000 43 2393.340000 23 2095.270000 30 1478.600000 14 1420.780000
|
68 |
+
33
|
69 |
+
10 32 3618.110000 23 3598.100000 34 3530.530000 43 3462.370000 24 3091.530000 44 2608.080000 42 2426.000000 22 1717.940000 31 1407.650000 25 1324.780000
|
70 |
+
34
|
71 |
+
10 33 3523.370000 42 3356.550000 35 3210.340000 22 3178.850000 23 3079.030000 43 2396.450000 41 2386.860000 24 1408.020000 32 1301.340000 21 1256.450000
|
72 |
+
35
|
73 |
+
10 34 3187.880000 41 3106.440000 36 2866.040000 22 2817.740000 21 2654.870000 40 2416.980000 42 2137.810000 23 1346.860000 33 1150.330000 16 1044.660000
|
74 |
+
36
|
75 |
+
8 40 2910.700000 35 2832.660000 21 2689.960000 20 2280.460000 41 1787.970000 22 1268.490000 34 981.636000 17 954.229000
|
76 |
+
40
|
77 |
+
7 36 2918.140000 41 2852.620000 35 2392.960000 21 1124.300000 42 1056.480000 34 877.946000 20 788.701000
|
78 |
+
41
|
79 |
+
9 35 3111.050000 42 3049.710000 40 2885.360000 34 2371.020000 36 1813.690000 43 1164.710000 22 1126.900000 21 906.536000 33 903.238000
|
80 |
+
42
|
81 |
+
10 34 3356.980000 43 3183.000000 41 3070.540000 33 2421.770000 35 2155.080000 44 1278.410000 23 1183.520000 22 1147.070000 40 1077.080000 32 899.646000
|
82 |
+
43
|
83 |
+
10 33 3461.240000 44 3380.740000 42 3188.700000 34 2400.600000 32 2399.090000 45 1359.370000 23 1314.080000 41 1176.120000 24 1159.620000 31 901.556000
|
84 |
+
44
|
85 |
+
10 32 3550.810000 45 3510.160000 43 3373.110000 33 2602.330000 31 2395.930000 24 1410.430000 46 1386.310000 42 1279.000000 25 1095.240000 34 968.440000
|
86 |
+
45
|
87 |
+
10 31 3650.090000 46 3555.090000 44 3491.150000 32 2868.390000 30 2373.590000 25 1485.370000 47 1405.280000 43 1349.540000 33 1104.770000 26 1046.810000
|
88 |
+
46
|
89 |
+
10 30 3635.640000 47 3562.170000 45 3524.170000 31 2976.820000 29 2264.040000 26 1508.870000 44 1367.410000 48 1352.100000 32 1211.240000 25 1102.170000
|
90 |
+
47
|
91 |
+
10 29 3705.310000 46 3519.760000 48 3450.480000 30 3074.770000 28 2054.630000 27 1434.570000 45 1377.340000 31 1268.230000 26 1223.830000 25 471.111000
|
92 |
+
48
|
93 |
+
10 47 3401.950000 28 3224.840000 29 3101.160000 46 1317.100000 30 1306.700000 27 1235.070000 26 537.731000 31 291.919000 45 276.869000 11 258.856000
|
SparseNeuS_demo_v1/data/dtu/lists/test.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
scan24
|
2 |
+
scan37
|
3 |
+
scan40
|
4 |
+
scan55
|
5 |
+
scan63
|
6 |
+
scan65
|
7 |
+
scan69
|
8 |
+
scan83
|
9 |
+
scan97
|
10 |
+
scan105
|
11 |
+
scan106
|
12 |
+
scan110
|
13 |
+
scan114
|
14 |
+
scan118
|
15 |
+
scan122
|
SparseNeuS_demo_v1/data/dtu/lists/train.txt
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
scan1
|
2 |
+
scan4
|
3 |
+
scan5
|
4 |
+
scan6
|
5 |
+
scan8
|
6 |
+
scan9
|
7 |
+
scan10
|
8 |
+
scan11
|
9 |
+
scan12
|
10 |
+
scan13
|
11 |
+
scan14
|
12 |
+
scan15
|
13 |
+
scan16
|
14 |
+
scan17
|
15 |
+
scan18
|
16 |
+
scan19
|
17 |
+
scan20
|
18 |
+
scan21
|
19 |
+
scan22
|
20 |
+
scan23
|
21 |
+
scan28
|
22 |
+
scan29
|
23 |
+
scan30
|
24 |
+
scan31
|
25 |
+
scan32
|
26 |
+
scan33
|
27 |
+
scan34
|
28 |
+
scan35
|
29 |
+
scan36
|
30 |
+
scan38
|
31 |
+
scan39
|
32 |
+
scan41
|
33 |
+
scan42
|
34 |
+
scan43
|
35 |
+
scan44
|
36 |
+
scan45
|
37 |
+
scan46
|
38 |
+
scan47
|
39 |
+
scan48
|
40 |
+
scan49
|
41 |
+
scan50
|
42 |
+
scan51
|
43 |
+
scan52
|
44 |
+
scan59
|
45 |
+
scan60
|
46 |
+
scan61
|
47 |
+
scan62
|
48 |
+
scan64
|
49 |
+
scan74
|
50 |
+
scan75
|
51 |
+
scan76
|
52 |
+
scan77
|
53 |
+
scan84
|
54 |
+
scan85
|
55 |
+
scan86
|
56 |
+
scan87
|
57 |
+
scan88
|
58 |
+
scan89
|
59 |
+
scan90
|
60 |
+
scan91
|
61 |
+
scan92
|
62 |
+
scan93
|
63 |
+
scan94
|
64 |
+
scan95
|
65 |
+
scan96
|
66 |
+
scan98
|
67 |
+
scan99
|
68 |
+
scan100
|
69 |
+
scan101
|
70 |
+
scan102
|
71 |
+
scan103
|
72 |
+
scan104
|
73 |
+
scan126
|
74 |
+
scan127
|
75 |
+
scan128
|
SparseNeuS_demo_v1/data/dtu_fit.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import cv2 as cv
|
4 |
+
import numpy as np
|
5 |
+
import re
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
from glob import glob
|
9 |
+
|
10 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
11 |
+
|
12 |
+
from data.scene import get_boundingbox
|
13 |
+
|
14 |
+
|
15 |
+
def load_K_Rt_from_P(filename, P=None):
|
16 |
+
if P is None:
|
17 |
+
lines = open(filename).read().splitlines()
|
18 |
+
if len(lines) == 4:
|
19 |
+
lines = lines[1:]
|
20 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
21 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
22 |
+
|
23 |
+
out = cv.decomposeProjectionMatrix(P)
|
24 |
+
K = out[0]
|
25 |
+
R = out[1]
|
26 |
+
t = out[2]
|
27 |
+
|
28 |
+
K = K / K[2, 2]
|
29 |
+
intrinsics = np.eye(4)
|
30 |
+
intrinsics[:3, :3] = K
|
31 |
+
|
32 |
+
pose = np.eye(4, dtype=np.float32)
|
33 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
34 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
35 |
+
|
36 |
+
return intrinsics, pose # ! return cam2world matrix here
|
37 |
+
|
38 |
+
|
39 |
+
class DtuFit:
|
40 |
+
def __init__(self, root_dir, split, scan_id, n_views, train_img_idx=[], test_img_idx=[],
|
41 |
+
img_wh=[800, 600], clip_wh=[0, 0], original_img_wh=[1600, 1200],
|
42 |
+
N_rays=512, h_patch_size=5, near=425, far=900):
|
43 |
+
super(DtuFit, self).__init__()
|
44 |
+
logging.info('Load data: Begin')
|
45 |
+
|
46 |
+
self.root_dir = root_dir
|
47 |
+
self.split = split
|
48 |
+
self.scan_id = scan_id
|
49 |
+
self.n_views = n_views
|
50 |
+
|
51 |
+
self.near = near
|
52 |
+
self.far = far
|
53 |
+
|
54 |
+
if self.scan_id is not None:
|
55 |
+
self.data_dir = os.path.join(self.root_dir, self.scan_id)
|
56 |
+
else:
|
57 |
+
self.data_dir = self.root_dir
|
58 |
+
|
59 |
+
self.img_wh = img_wh
|
60 |
+
self.clip_wh = clip_wh
|
61 |
+
|
62 |
+
if len(self.clip_wh) == 2:
|
63 |
+
self.clip_wh = self.clip_wh + self.clip_wh
|
64 |
+
|
65 |
+
self.original_img_wh = original_img_wh
|
66 |
+
self.N_rays = N_rays
|
67 |
+
self.h_patch_size = h_patch_size # used to extract patch for supervision
|
68 |
+
self.train_img_idx = train_img_idx
|
69 |
+
self.test_img_idx = test_img_idx
|
70 |
+
|
71 |
+
camera_dict = np.load(os.path.join(self.data_dir, 'cameras.npz'), allow_pickle=True)
|
72 |
+
self.images_list = sorted(glob(os.path.join(self.data_dir, "image/*.png")))
|
73 |
+
# world_mat: projection matrix: world to image
|
74 |
+
self.world_mats_np = [camera_dict['world_mat_%d' % idx].astype(np.float32) for idx in
|
75 |
+
range(len(self.images_list))]
|
76 |
+
|
77 |
+
self.raw_near_fars = np.stack([np.array([self.near, self.far]) for i in range(len(self.images_list))])
|
78 |
+
|
79 |
+
# - reference image; transform the world system to the ref-camera system
|
80 |
+
self.ref_img_idx = self.train_img_idx[0]
|
81 |
+
ref_world_mat = self.world_mats_np[self.ref_img_idx]
|
82 |
+
self.ref_w2c = np.linalg.inv(load_K_Rt_from_P(None, ref_world_mat[:3, :4])[1])
|
83 |
+
|
84 |
+
self.all_images = []
|
85 |
+
self.all_intrinsics = []
|
86 |
+
self.all_w2cs = []
|
87 |
+
|
88 |
+
self.load_scene() # load the scene
|
89 |
+
|
90 |
+
# ! estimate scale_mat
|
91 |
+
self.scale_mat, self.scale_factor = self.cal_scale_mat(
|
92 |
+
img_hw=[self.img_wh[1], self.img_wh[0]],
|
93 |
+
intrinsics=self.all_intrinsics[self.train_img_idx],
|
94 |
+
extrinsics=self.all_w2cs[self.train_img_idx],
|
95 |
+
near_fars=self.raw_near_fars[self.train_img_idx],
|
96 |
+
factor=1.1)
|
97 |
+
|
98 |
+
# * after scaling and translation, unit bounding box
|
99 |
+
self.scaled_intrinsics, self.scaled_w2cs, self.scaled_c2ws, \
|
100 |
+
self.scaled_affine_mats, self.scaled_near_fars = self.scale_cam_info()
|
101 |
+
# import ipdb; ipdb.set_trace()
|
102 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
103 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
104 |
+
self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
|
105 |
+
|
106 |
+
logging.info('Load data: End')
|
107 |
+
|
108 |
+
def load_scene(self):
|
109 |
+
|
110 |
+
scale_x = self.img_wh[0] / self.original_img_wh[0]
|
111 |
+
scale_y = self.img_wh[1] / self.original_img_wh[1]
|
112 |
+
|
113 |
+
for idx in range(len(self.images_list)):
|
114 |
+
image = cv.imread(self.images_list[idx])
|
115 |
+
image = cv.resize(image, (self.img_wh[0], self.img_wh[1])) / 255.
|
116 |
+
|
117 |
+
image = image[self.clip_wh[1]:self.img_wh[1] - self.clip_wh[3],
|
118 |
+
self.clip_wh[0]:self.img_wh[0] - self.clip_wh[2]]
|
119 |
+
self.all_images.append(np.transpose(image[:, :, ::-1], (2, 0, 1))) # append [3,]
|
120 |
+
|
121 |
+
P = self.world_mats_np[idx]
|
122 |
+
P = P[:3, :4]
|
123 |
+
intrinsics, c2w = load_K_Rt_from_P(None, P)
|
124 |
+
w2c = np.linalg.inv(c2w)
|
125 |
+
|
126 |
+
intrinsics[:1] *= scale_x
|
127 |
+
intrinsics[1:2] *= scale_y
|
128 |
+
|
129 |
+
intrinsics[0, 2] -= self.clip_wh[0]
|
130 |
+
intrinsics[1, 2] -= self.clip_wh[1]
|
131 |
+
|
132 |
+
self.all_intrinsics.append(intrinsics)
|
133 |
+
# - transform from world system to ref-camera system
|
134 |
+
self.all_w2cs.append(w2c @ np.linalg.inv(self.ref_w2c))
|
135 |
+
|
136 |
+
|
137 |
+
self.all_images = torch.from_numpy(np.stack(self.all_images)).to(torch.float32)
|
138 |
+
self.all_intrinsics = torch.from_numpy(np.stack(self.all_intrinsics)).to(torch.float32)
|
139 |
+
self.all_w2cs = torch.from_numpy(np.stack(self.all_w2cs)).to(torch.float32)
|
140 |
+
self.img_wh = [self.img_wh[0] - self.clip_wh[0] - self.clip_wh[2],
|
141 |
+
self.img_wh[1] - self.clip_wh[1] - self.clip_wh[3]]
|
142 |
+
|
143 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
144 |
+
center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
145 |
+
radius = radius * factor
|
146 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
147 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
148 |
+
scale_mat = scale_mat.astype(np.float32)
|
149 |
+
|
150 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
151 |
+
|
152 |
+
def scale_cam_info(self):
|
153 |
+
new_intrinsics = []
|
154 |
+
new_near_fars = []
|
155 |
+
new_w2cs = []
|
156 |
+
new_c2ws = []
|
157 |
+
new_affine_mats = []
|
158 |
+
for idx in range(len(self.all_images)):
|
159 |
+
intrinsics = self.all_intrinsics[idx]
|
160 |
+
P = intrinsics @ self.all_w2cs[idx] @ self.scale_mat
|
161 |
+
P = P.cpu().numpy()[:3, :4]
|
162 |
+
|
163 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
164 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
165 |
+
w2c = np.linalg.inv(c2w)
|
166 |
+
new_w2cs.append(w2c)
|
167 |
+
new_c2ws.append(c2w)
|
168 |
+
new_intrinsics.append(intrinsics)
|
169 |
+
affine_mat = np.eye(4)
|
170 |
+
affine_mat[:3, :4] = intrinsics[:3, :3] @ w2c[:3, :4]
|
171 |
+
new_affine_mats.append(affine_mat)
|
172 |
+
|
173 |
+
camera_o = c2w[:3, 3]
|
174 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
175 |
+
near = dist - 1
|
176 |
+
far = dist + 1
|
177 |
+
|
178 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
179 |
+
|
180 |
+
new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars = \
|
181 |
+
np.stack(new_intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), \
|
182 |
+
np.stack(new_affine_mats), np.stack(new_near_fars)
|
183 |
+
|
184 |
+
new_intrinsics = torch.from_numpy(np.float32(new_intrinsics))
|
185 |
+
new_w2cs = torch.from_numpy(np.float32(new_w2cs))
|
186 |
+
new_c2ws = torch.from_numpy(np.float32(new_c2ws))
|
187 |
+
new_affine_mats = torch.from_numpy(np.float32(new_affine_mats))
|
188 |
+
new_near_fars = torch.from_numpy(np.float32(new_near_fars))
|
189 |
+
|
190 |
+
return new_intrinsics, new_w2cs, new_c2ws, new_affine_mats, new_near_fars
|
191 |
+
|
192 |
+
|
193 |
+
def get_conditional_sample(self):
|
194 |
+
sample = {}
|
195 |
+
support_idxs = self.train_img_idx
|
196 |
+
|
197 |
+
sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
|
198 |
+
sample['w2cs'] = self.scaled_w2cs[self.train_img_idx] # (V, 4, 4)
|
199 |
+
sample['c2ws'] = self.scaled_c2ws[self.train_img_idx] # (V, 4, 4)
|
200 |
+
sample['near_fars'] = self.scaled_near_fars[self.train_img_idx] # (V, 2)
|
201 |
+
sample['intrinsics'] = self.scaled_intrinsics[self.train_img_idx][:, :3, :3] # (V, 3, 3)
|
202 |
+
sample['affine_mats'] = self.scaled_affine_mats[self.train_img_idx] # ! in world space
|
203 |
+
|
204 |
+
sample['scan'] = self.scan_id
|
205 |
+
sample['scale_factor'] = torch.tensor(self.scale_factor)
|
206 |
+
sample['scale_mat'] = torch.from_numpy(self.scale_mat)
|
207 |
+
sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
|
208 |
+
sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
|
209 |
+
sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
|
210 |
+
|
211 |
+
return sample
|
212 |
+
|
213 |
+
def __len__(self):
|
214 |
+
if self.split == 'train':
|
215 |
+
return self.n_views * 1000
|
216 |
+
else:
|
217 |
+
return len(self.test_img_idx) * 1000
|
218 |
+
|
219 |
+
def __getitem__(self, idx):
|
220 |
+
sample = {}
|
221 |
+
|
222 |
+
if self.split == 'train':
|
223 |
+
render_idx = self.train_img_idx[idx % self.n_views]
|
224 |
+
support_idxs = [idx for idx in self.train_img_idx if idx != render_idx]
|
225 |
+
else:
|
226 |
+
# render_idx = idx % self.n_test_images + self.n_train_images
|
227 |
+
render_idx = self.test_img_idx[idx % len(self.test_img_idx)]
|
228 |
+
support_idxs = [render_idx]
|
229 |
+
|
230 |
+
sample['images'] = self.all_images[support_idxs] # (V, 3, H, W)
|
231 |
+
sample['w2cs'] = self.scaled_w2cs[support_idxs] # (V, 4, 4)
|
232 |
+
sample['c2ws'] = self.scaled_c2ws[support_idxs] # (V, 4, 4)
|
233 |
+
sample['intrinsics'] = self.scaled_intrinsics[support_idxs][:, :3, :3] # (V, 3, 3)
|
234 |
+
sample['affine_mats'] = self.scaled_affine_mats[support_idxs] # ! in world space
|
235 |
+
sample['scan'] = self.scan_id
|
236 |
+
sample['scale_factor'] = torch.tensor(self.scale_factor)
|
237 |
+
sample['img_wh'] = torch.from_numpy(np.array(self.img_wh))
|
238 |
+
sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
|
239 |
+
sample['img_index'] = torch.tensor(render_idx)
|
240 |
+
|
241 |
+
# - query image
|
242 |
+
sample['query_image'] = self.all_images[render_idx]
|
243 |
+
sample['query_c2w'] = self.scaled_c2ws[render_idx]
|
244 |
+
sample['query_w2c'] = self.scaled_w2cs[render_idx]
|
245 |
+
sample['query_intrinsic'] = self.scaled_intrinsics[render_idx]
|
246 |
+
sample['query_near_far'] = self.scaled_near_fars[render_idx]
|
247 |
+
sample['meta'] = str(self.scan_id) + "_" + os.path.basename(self.images_list[render_idx])
|
248 |
+
sample['scale_mat'] = torch.from_numpy(self.scale_mat)
|
249 |
+
sample['trans_mat'] = torch.from_numpy(np.linalg.inv(self.ref_w2c))
|
250 |
+
sample['rendering_c2ws'] = self.scaled_c2ws[self.test_img_idx]
|
251 |
+
sample['rendering_imgs_idx'] = torch.Tensor(np.array(self.test_img_idx).astype(np.int32))
|
252 |
+
|
253 |
+
# - generate rays
|
254 |
+
if self.split == 'val' or self.split == 'test':
|
255 |
+
sample_rays = gen_rays_from_single_image(
|
256 |
+
self.img_wh[1], self.img_wh[0],
|
257 |
+
sample['query_image'],
|
258 |
+
sample['query_intrinsic'],
|
259 |
+
sample['query_c2w'],
|
260 |
+
depth=None,
|
261 |
+
mask=None)
|
262 |
+
else:
|
263 |
+
sample_rays = gen_random_rays_from_single_image(
|
264 |
+
self.img_wh[1], self.img_wh[0],
|
265 |
+
self.N_rays,
|
266 |
+
sample['query_image'],
|
267 |
+
sample['query_intrinsic'],
|
268 |
+
sample['query_c2w'],
|
269 |
+
depth=None,
|
270 |
+
mask=None,
|
271 |
+
dilated_mask=None,
|
272 |
+
importance_sample=False,
|
273 |
+
h_patch_size=self.h_patch_size
|
274 |
+
)
|
275 |
+
|
276 |
+
sample['rays'] = sample_rays
|
277 |
+
|
278 |
+
return sample
|
SparseNeuS_demo_v1/data/dtu_general.py
ADDED
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from utils.misc_utils import read_pfm
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
from PIL import Image
|
7 |
+
import torch
|
8 |
+
from torchvision import transforms as T
|
9 |
+
from data.scene import get_boundingbox
|
10 |
+
|
11 |
+
from models.rays import gen_rays_from_single_image, gen_random_rays_from_single_image
|
12 |
+
|
13 |
+
from termcolor import colored
|
14 |
+
import pdb
|
15 |
+
import random
|
16 |
+
|
17 |
+
|
18 |
+
def load_K_Rt_from_P(filename, P=None):
|
19 |
+
if P is None:
|
20 |
+
lines = open(filename).read().splitlines()
|
21 |
+
if len(lines) == 4:
|
22 |
+
lines = lines[1:]
|
23 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
24 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
25 |
+
|
26 |
+
out = cv2.decomposeProjectionMatrix(P)
|
27 |
+
K = out[0]
|
28 |
+
R = out[1]
|
29 |
+
t = out[2]
|
30 |
+
|
31 |
+
K = K / K[2, 2]
|
32 |
+
intrinsics = np.eye(4)
|
33 |
+
intrinsics[:3, :3] = K
|
34 |
+
|
35 |
+
pose = np.eye(4, dtype=np.float32)
|
36 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
37 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
38 |
+
|
39 |
+
return intrinsics, pose # ! return cam2world matrix here
|
40 |
+
|
41 |
+
|
42 |
+
# ! load one ref-image with multiple src-images in camera coordinate system
|
43 |
+
class MVSDatasetDtuPerView(Dataset):
|
44 |
+
def __init__(self, root_dir, split, n_views=3, img_wh=(640, 512), downSample=1.0,
|
45 |
+
split_filepath=None, pair_filepath=None,
|
46 |
+
N_rays=512,
|
47 |
+
vol_dims=[128, 128, 128], batch_size=1,
|
48 |
+
clean_image=False, importance_sample=False, test_ref_views=[]):
|
49 |
+
|
50 |
+
self.root_dir = root_dir
|
51 |
+
self.split = split
|
52 |
+
|
53 |
+
self.img_wh = img_wh
|
54 |
+
self.downSample = downSample
|
55 |
+
self.num_all_imgs = 49 # this preprocessed DTU dataset has 49 images
|
56 |
+
self.n_views = n_views
|
57 |
+
self.N_rays = N_rays
|
58 |
+
self.batch_size = batch_size # - used for construct new metas for gru fusion training
|
59 |
+
|
60 |
+
self.clean_image = clean_image
|
61 |
+
self.importance_sample = importance_sample
|
62 |
+
self.test_ref_views = test_ref_views # used for testing
|
63 |
+
self.scale_factor = 1.0
|
64 |
+
self.scale_mat = np.float32(np.diag([1, 1, 1, 1.0]))
|
65 |
+
|
66 |
+
if img_wh is not None:
|
67 |
+
assert img_wh[0] % 32 == 0 and img_wh[1] % 32 == 0, \
|
68 |
+
'img_wh must both be multiples of 32!'
|
69 |
+
|
70 |
+
self.split_filepath = f'data/dtu/lists/{self.split}.txt' if split_filepath is None else split_filepath
|
71 |
+
self.pair_filepath = f'data/dtu/dtu_pairs.txt' if pair_filepath is None else pair_filepath
|
72 |
+
|
73 |
+
print(colored("loading all scenes together", 'red'))
|
74 |
+
with open(self.split_filepath) as f:
|
75 |
+
self.scans = [line.rstrip() for line in f.readlines()]
|
76 |
+
|
77 |
+
self.all_intrinsics = [] # the cam info of the whole scene
|
78 |
+
self.all_extrinsics = []
|
79 |
+
self.all_near_fars = []
|
80 |
+
|
81 |
+
self.metas, self.ref_src_pairs = self.build_metas() # load ref-srcs view pairs info of the scene
|
82 |
+
|
83 |
+
self.allview_ids = [i for i in range(self.num_all_imgs)]
|
84 |
+
|
85 |
+
self.load_cam_info() # load camera info of DTU, and estimate scale_mat
|
86 |
+
|
87 |
+
self.build_remap()
|
88 |
+
self.define_transforms()
|
89 |
+
print(f'==> image down scale: {self.downSample}')
|
90 |
+
|
91 |
+
# * bounding box for rendering
|
92 |
+
self.bbox_min = np.array([-1.0, -1.0, -1.0])
|
93 |
+
self.bbox_max = np.array([1.0, 1.0, 1.0])
|
94 |
+
|
95 |
+
# - used for cost volume regularization
|
96 |
+
self.voxel_dims = torch.tensor(vol_dims, dtype=torch.float32)
|
97 |
+
self.partial_vol_origin = torch.Tensor([-1., -1., -1.])
|
98 |
+
|
99 |
+
def build_remap(self):
|
100 |
+
self.remap = np.zeros(np.max(self.allview_ids) + 1).astype('int')
|
101 |
+
for i, item in enumerate(self.allview_ids):
|
102 |
+
self.remap[item] = i
|
103 |
+
|
104 |
+
def define_transforms(self):
|
105 |
+
self.transform = T.Compose([T.ToTensor()])
|
106 |
+
|
107 |
+
def build_metas(self):
|
108 |
+
metas = []
|
109 |
+
ref_src_pairs = {}
|
110 |
+
# light conditions 0-6 for training
|
111 |
+
# light condition 3 for testing (the brightest?)
|
112 |
+
light_idxs = [3] if 'train' not in self.split else range(7)
|
113 |
+
|
114 |
+
with open(self.pair_filepath) as f:
|
115 |
+
num_viewpoint = int(f.readline())
|
116 |
+
# viewpoints (49)
|
117 |
+
for _ in range(num_viewpoint):
|
118 |
+
ref_view = int(f.readline().rstrip())
|
119 |
+
src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
|
120 |
+
|
121 |
+
ref_src_pairs[ref_view] = src_views
|
122 |
+
|
123 |
+
for light_idx in light_idxs:
|
124 |
+
for scan in self.scans:
|
125 |
+
with open(self.pair_filepath) as f:
|
126 |
+
num_viewpoint = int(f.readline())
|
127 |
+
# viewpoints (49)
|
128 |
+
for _ in range(num_viewpoint):
|
129 |
+
ref_view = int(f.readline().rstrip())
|
130 |
+
src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
|
131 |
+
|
132 |
+
# ! only for validation
|
133 |
+
if len(self.test_ref_views) > 0 and ref_view not in self.test_ref_views:
|
134 |
+
continue
|
135 |
+
|
136 |
+
metas += [(scan, light_idx, ref_view, src_views)]
|
137 |
+
|
138 |
+
return metas, ref_src_pairs
|
139 |
+
|
140 |
+
def read_cam_file(self, filename):
|
141 |
+
with open(filename) as f:
|
142 |
+
lines = [line.rstrip() for line in f.readlines()]
|
143 |
+
# extrinsics: line [1,5), 4x4 matrix
|
144 |
+
extrinsics = np.fromstring(' '.join(lines[1:5]), dtype=np.float32, sep=' ')
|
145 |
+
extrinsics = extrinsics.reshape((4, 4))
|
146 |
+
# intrinsics: line [7-10), 3x3 matrix
|
147 |
+
intrinsics = np.fromstring(' '.join(lines[7:10]), dtype=np.float32, sep=' ')
|
148 |
+
intrinsics = intrinsics.reshape((3, 3))
|
149 |
+
# depth_min & depth_interval: line 11
|
150 |
+
depth_min = float(lines[11].split()[0])
|
151 |
+
depth_max = depth_min + float(lines[11].split()[1]) * 192
|
152 |
+
self.depth_interval = float(lines[11].split()[1])
|
153 |
+
intrinsics_ = np.float32(np.diag([1, 1, 1, 1]))
|
154 |
+
intrinsics_[:3, :3] = intrinsics
|
155 |
+
return intrinsics_, extrinsics, [depth_min, depth_max]
|
156 |
+
|
157 |
+
def load_cam_info(self):
|
158 |
+
for vid in range(self.num_all_imgs):
|
159 |
+
proj_mat_filename = os.path.join(self.root_dir,
|
160 |
+
f'Cameras/train/{vid:08d}_cam.txt')
|
161 |
+
intrinsic, extrinsic, near_far = self.read_cam_file(proj_mat_filename)
|
162 |
+
intrinsic[:2] *= 4 # * the provided intrinsics is 4x downsampled, now keep the same scale with image
|
163 |
+
self.all_intrinsics.append(intrinsic)
|
164 |
+
self.all_extrinsics.append(extrinsic)
|
165 |
+
self.all_near_fars.append(near_far)
|
166 |
+
|
167 |
+
def read_depth(self, filename):
|
168 |
+
# import ipdb; ipdb.set_trace()
|
169 |
+
depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (1200, 1600)
|
170 |
+
depth_h = np.ones((1200, 1600))
|
171 |
+
# print(depth_h.shape)
|
172 |
+
depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
|
173 |
+
interpolation=cv2.INTER_NEAREST) # (600, 800)
|
174 |
+
depth_h = depth_h[44:556, 80:720] # (512, 640)
|
175 |
+
# print(depth_h.shape)
|
176 |
+
# import ipdb; ipdb.set_trace()
|
177 |
+
depth_h = cv2.resize(depth_h, None, fx=self.downSample, fy=self.downSample,
|
178 |
+
interpolation=cv2.INTER_NEAREST)
|
179 |
+
depth = cv2.resize(depth_h, None, fx=1.0 / 4, fy=1.0 / 4,
|
180 |
+
interpolation=cv2.INTER_NEAREST)
|
181 |
+
|
182 |
+
return depth, depth_h
|
183 |
+
|
184 |
+
def read_mask(self, filename):
|
185 |
+
mask_h = cv2.imread(filename, 0)
|
186 |
+
mask_h = cv2.resize(mask_h, None, fx=self.downSample, fy=self.downSample,
|
187 |
+
interpolation=cv2.INTER_NEAREST)
|
188 |
+
mask = cv2.resize(mask_h, None, fx=0.25, fy=0.25,
|
189 |
+
interpolation=cv2.INTER_NEAREST)
|
190 |
+
|
191 |
+
mask[mask > 0] = 1 # the masks stored in png are not binary
|
192 |
+
mask_h[mask_h > 0] = 1
|
193 |
+
|
194 |
+
return mask, mask_h
|
195 |
+
|
196 |
+
def cal_scale_mat(self, img_hw, intrinsics, extrinsics, near_fars, factor=1.):
|
197 |
+
center, radius, _ = get_boundingbox(img_hw, intrinsics, extrinsics, near_fars)
|
198 |
+
radius = radius * factor
|
199 |
+
scale_mat = np.diag([radius, radius, radius, 1.0])
|
200 |
+
scale_mat[:3, 3] = center.cpu().numpy()
|
201 |
+
scale_mat = scale_mat.astype(np.float32)
|
202 |
+
|
203 |
+
return scale_mat, 1. / radius.cpu().numpy()
|
204 |
+
|
205 |
+
def __len__(self):
|
206 |
+
return len(self.metas)
|
207 |
+
|
208 |
+
def __getitem__(self, idx):
|
209 |
+
sample = {}
|
210 |
+
scan, light_idx, ref_view, src_views = self.metas[idx % len(self.metas)]
|
211 |
+
|
212 |
+
# generalized, load some images at once
|
213 |
+
view_ids = [ref_view] + src_views[:self.n_views]
|
214 |
+
# * transform from world system to camera system
|
215 |
+
w2c_ref = self.all_extrinsics[self.remap[ref_view]]
|
216 |
+
w2c_ref_inv = np.linalg.inv(w2c_ref)
|
217 |
+
|
218 |
+
image_perm = 0 # only supervised on reference view
|
219 |
+
|
220 |
+
imgs, depths_h, masks_h = [], [], [] # full size (640, 512)
|
221 |
+
intrinsics, w2cs, near_fars = [], [], [] # record proj mats between views
|
222 |
+
mask_dilated = None
|
223 |
+
for i, vid in enumerate(view_ids):
|
224 |
+
# NOTE that the id in image file names is from 1 to 49 (not 0~48)
|
225 |
+
img_filename = os.path.join(self.root_dir,
|
226 |
+
f'Rectified/{scan}_train/rect_{vid + 1:03d}_{light_idx}_r5000.png')
|
227 |
+
depth_filename = os.path.join(self.root_dir,
|
228 |
+
f'Depths/{scan}_train/depth_map_{vid:04d}.pfm')
|
229 |
+
# print(depth_filename)
|
230 |
+
mask_filename = os.path.join(self.root_dir,
|
231 |
+
f'Masks_clean_dilated/{scan}_train/mask_{vid:04d}.png')
|
232 |
+
|
233 |
+
img = Image.open(img_filename)
|
234 |
+
img_wh = np.round(np.array(img.size) * self.downSample).astype('int')
|
235 |
+
img = img.resize(img_wh, Image.BILINEAR)
|
236 |
+
|
237 |
+
if os.path.exists(mask_filename) and self.clean_image:
|
238 |
+
mask_l, mask_h = self.read_mask(mask_filename)
|
239 |
+
else:
|
240 |
+
# print(self.split, "don't find mask file", mask_filename)
|
241 |
+
mask_h = np.ones([img_wh[1], img_wh[0]])
|
242 |
+
masks_h.append(mask_h)
|
243 |
+
|
244 |
+
if i == 0:
|
245 |
+
kernel_size = 101 # default 101
|
246 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
|
247 |
+
mask_dilated = np.float32(cv2.dilate(np.uint8(mask_h * 255), kernel, iterations=1) > 128)
|
248 |
+
|
249 |
+
if self.clean_image:
|
250 |
+
img = np.array(img)
|
251 |
+
img[mask_h < 0.5] = 0.0
|
252 |
+
|
253 |
+
img = self.transform(img)
|
254 |
+
|
255 |
+
imgs += [img]
|
256 |
+
|
257 |
+
index_mat = self.remap[vid]
|
258 |
+
near_fars.append(self.all_near_fars[index_mat])
|
259 |
+
intrinsics.append(self.all_intrinsics[index_mat])
|
260 |
+
|
261 |
+
w2cs.append(self.all_extrinsics[index_mat] @ w2c_ref_inv)
|
262 |
+
|
263 |
+
# print(depth_filename)
|
264 |
+
if os.path.exists(depth_filename): # and i == 0
|
265 |
+
# print("file exists")
|
266 |
+
depth_l, depth_h = self.read_depth(depth_filename)
|
267 |
+
depths_h.append(depth_h)
|
268 |
+
# ! estimate scale_mat
|
269 |
+
scale_mat, scale_factor = self.cal_scale_mat(img_hw=[img_wh[1], img_wh[0]],
|
270 |
+
intrinsics=intrinsics, extrinsics=w2cs,
|
271 |
+
near_fars=near_fars, factor=1.1)
|
272 |
+
|
273 |
+
# ! calculate the new w2cs after scaling
|
274 |
+
new_near_fars = []
|
275 |
+
new_w2cs = []
|
276 |
+
new_c2ws = []
|
277 |
+
new_affine_mats = []
|
278 |
+
new_depths_h = []
|
279 |
+
for intrinsic, extrinsic, near_far, depth in zip(intrinsics, w2cs, near_fars, depths_h):
|
280 |
+
P = intrinsic @ extrinsic @ scale_mat
|
281 |
+
P = P[:3, :4]
|
282 |
+
# - should use load_K_Rt_from_P() to obtain c2w
|
283 |
+
c2w = load_K_Rt_from_P(None, P)[1]
|
284 |
+
w2c = np.linalg.inv(c2w)
|
285 |
+
new_w2cs.append(w2c)
|
286 |
+
new_c2ws.append(c2w)
|
287 |
+
affine_mat = np.eye(4)
|
288 |
+
affine_mat[:3, :4] = intrinsic[:3, :3] @ w2c[:3, :4]
|
289 |
+
new_affine_mats.append(affine_mat)
|
290 |
+
|
291 |
+
camera_o = c2w[:3, 3]
|
292 |
+
dist = np.sqrt(np.sum(camera_o ** 2))
|
293 |
+
near = dist - 1
|
294 |
+
far = dist + 1
|
295 |
+
|
296 |
+
new_near_fars.append([0.95 * near, 1.05 * far])
|
297 |
+
new_depths_h.append(depth * scale_factor)
|
298 |
+
|
299 |
+
imgs = torch.stack(imgs).float()
|
300 |
+
print(new_near_fars)
|
301 |
+
depths_h = np.stack(new_depths_h)
|
302 |
+
masks_h = np.stack(masks_h)
|
303 |
+
|
304 |
+
affine_mats = np.stack(new_affine_mats)
|
305 |
+
intrinsics, w2cs, c2ws, near_fars = np.stack(intrinsics), np.stack(new_w2cs), np.stack(new_c2ws), np.stack(
|
306 |
+
new_near_fars)
|
307 |
+
|
308 |
+
if 'train' in self.split:
|
309 |
+
start_idx = 0
|
310 |
+
else:
|
311 |
+
start_idx = 1
|
312 |
+
|
313 |
+
sample['images'] = imgs # (V, 3, H, W)
|
314 |
+
sample['depths_h'] = torch.from_numpy(depths_h.astype(np.float32)) # (V, H, W)
|
315 |
+
sample['masks_h'] = torch.from_numpy(masks_h.astype(np.float32)) # (V, H, W)
|
316 |
+
sample['w2cs'] = torch.from_numpy(w2cs.astype(np.float32)) # (V, 4, 4)
|
317 |
+
sample['c2ws'] = torch.from_numpy(c2ws.astype(np.float32)) # (V, 4, 4)
|
318 |
+
sample['near_fars'] = torch.from_numpy(near_fars.astype(np.float32)) # (V, 2)
|
319 |
+
sample['intrinsics'] = torch.from_numpy(intrinsics.astype(np.float32))[:, :3, :3] # (V, 3, 3)
|
320 |
+
sample['view_ids'] = torch.from_numpy(np.array(view_ids))
|
321 |
+
sample['affine_mats'] = torch.from_numpy(affine_mats.astype(np.float32)) # ! in world space
|
322 |
+
|
323 |
+
sample['light_idx'] = torch.tensor(light_idx)
|
324 |
+
sample['scan'] = scan
|
325 |
+
|
326 |
+
sample['scale_factor'] = torch.tensor(scale_factor)
|
327 |
+
sample['img_wh'] = torch.from_numpy(img_wh)
|
328 |
+
sample['render_img_idx'] = torch.tensor(image_perm)
|
329 |
+
sample['partial_vol_origin'] = torch.tensor(self.partial_vol_origin, dtype=torch.float32)
|
330 |
+
sample['meta'] = str(scan) + "_light" + str(light_idx) + "_refview" + str(ref_view)
|
331 |
+
|
332 |
+
# - image to render
|
333 |
+
sample['query_image'] = sample['images'][0]
|
334 |
+
sample['query_c2w'] = sample['c2ws'][0]
|
335 |
+
sample['query_w2c'] = sample['w2cs'][0]
|
336 |
+
sample['query_intrinsic'] = sample['intrinsics'][0]
|
337 |
+
sample['query_depth'] = sample['depths_h'][0]
|
338 |
+
sample['query_mask'] = sample['masks_h'][0]
|
339 |
+
sample['query_near_far'] = sample['near_fars'][0]
|
340 |
+
|
341 |
+
sample['images'] = sample['images'][start_idx:] # (V, 3, H, W)
|
342 |
+
sample['depths_h'] = sample['depths_h'][start_idx:] # (V, H, W)
|
343 |
+
sample['masks_h'] = sample['masks_h'][start_idx:] # (V, H, W)
|
344 |
+
sample['w2cs'] = sample['w2cs'][start_idx:] # (V, 4, 4)
|
345 |
+
sample['c2ws'] = sample['c2ws'][start_idx:] # (V, 4, 4)
|
346 |
+
sample['intrinsics'] = sample['intrinsics'][start_idx:] # (V, 3, 3)
|
347 |
+
sample['view_ids'] = sample['view_ids'][start_idx:]
|
348 |
+
sample['affine_mats'] = sample['affine_mats'][start_idx:] # ! in world space
|
349 |
+
|
350 |
+
sample['scale_mat'] = torch.from_numpy(scale_mat)
|
351 |
+
sample['trans_mat'] = torch.from_numpy(w2c_ref_inv)
|
352 |
+
|
353 |
+
# - generate rays
|
354 |
+
if ('val' in self.split) or ('test' in self.split):
|
355 |
+
sample_rays = gen_rays_from_single_image(
|
356 |
+
img_wh[1], img_wh[0],
|
357 |
+
sample['query_image'],
|
358 |
+
sample['query_intrinsic'],
|
359 |
+
sample['query_c2w'],
|
360 |
+
depth=sample['query_depth'],
|
361 |
+
mask=sample['query_mask'] if self.clean_image else None)
|
362 |
+
else:
|
363 |
+
sample_rays = gen_random_rays_from_single_image(
|
364 |
+
img_wh[1], img_wh[0],
|
365 |
+
self.N_rays,
|
366 |
+
sample['query_image'],
|
367 |
+
sample['query_intrinsic'],
|
368 |
+
sample['query_c2w'],
|
369 |
+
depth=sample['query_depth'],
|
370 |
+
mask=sample['query_mask'] if self.clean_image else None,
|
371 |
+
dilated_mask=mask_dilated,
|
372 |
+
importance_sample=self.importance_sample)
|
373 |
+
|
374 |
+
sample['rays'] = sample_rays
|
375 |
+
|
376 |
+
return sample
|
SparseNeuS_demo_v1/data/scene.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import pdb
|
4 |
+
|
5 |
+
|
6 |
+
def rigid_transform(xyz, transform):
|
7 |
+
"""Applies a rigid transform (c2w) to an (N, 3) pointcloud.
|
8 |
+
"""
|
9 |
+
device = xyz.device
|
10 |
+
xyz_h = torch.cat([xyz, torch.ones((len(xyz), 1)).to(device)], dim=1) # (N, 4)
|
11 |
+
xyz_t_h = (transform @ xyz_h.T).T # * checked: the same with the below
|
12 |
+
|
13 |
+
return xyz_t_h[:, :3]
|
14 |
+
|
15 |
+
|
16 |
+
def get_view_frustum(min_depth, max_depth, size, cam_intr, c2w):
|
17 |
+
"""Get corners of 3D camera view frustum of depth image
|
18 |
+
"""
|
19 |
+
device = cam_intr.device
|
20 |
+
im_h, im_w = size
|
21 |
+
im_h = int(im_h)
|
22 |
+
im_w = int(im_w)
|
23 |
+
view_frust_pts = torch.stack([
|
24 |
+
(torch.tensor([0, 0, im_w, im_w, 0, 0, im_w, im_w]).to(device) - cam_intr[0, 2]) * torch.tensor(
|
25 |
+
[min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(device) /
|
26 |
+
cam_intr[0, 0],
|
27 |
+
(torch.tensor([0, im_h, 0, im_h, 0, im_h, 0, im_h]).to(device) - cam_intr[1, 2]) * torch.tensor(
|
28 |
+
[min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(device) /
|
29 |
+
cam_intr[1, 1],
|
30 |
+
torch.tensor([min_depth, min_depth, min_depth, min_depth, max_depth, max_depth, max_depth, max_depth]).to(
|
31 |
+
device)
|
32 |
+
])
|
33 |
+
view_frust_pts = view_frust_pts.type(torch.float32)
|
34 |
+
c2w = c2w.type(torch.float32)
|
35 |
+
view_frust_pts = rigid_transform(view_frust_pts.T, c2w).T
|
36 |
+
return view_frust_pts
|
37 |
+
|
38 |
+
|
39 |
+
def set_pixel_coords(h, w):
|
40 |
+
i_range = torch.arange(0, h).view(1, h, 1).expand(1, h, w).type(torch.float32) # [1, H, W]
|
41 |
+
j_range = torch.arange(0, w).view(1, 1, w).expand(1, h, w).type(torch.float32) # [1, H, W]
|
42 |
+
ones = torch.ones(1, h, w).type(torch.float32)
|
43 |
+
|
44 |
+
pixel_coords = torch.stack((j_range, i_range, ones), dim=1) # [1, 3, H, W]
|
45 |
+
|
46 |
+
return pixel_coords
|
47 |
+
|
48 |
+
|
49 |
+
def get_boundingbox(img_hw, intrinsics, extrinsics, near_fars):
|
50 |
+
"""
|
51 |
+
# get the minimum bounding box of all visual hulls
|
52 |
+
:param img_hw:
|
53 |
+
:param intrinsics:
|
54 |
+
:param extrinsics:
|
55 |
+
:param near_fars:
|
56 |
+
:return:
|
57 |
+
"""
|
58 |
+
|
59 |
+
bnds = torch.zeros((3, 2))
|
60 |
+
bnds[:, 0] = np.inf
|
61 |
+
bnds[:, 1] = -np.inf
|
62 |
+
|
63 |
+
if isinstance(intrinsics, list):
|
64 |
+
num = len(intrinsics)
|
65 |
+
else:
|
66 |
+
num = intrinsics.shape[0]
|
67 |
+
# print("num: ", num)
|
68 |
+
view_frust_pts_list = []
|
69 |
+
for i in range(num):
|
70 |
+
if not isinstance(intrinsics[i], torch.Tensor):
|
71 |
+
cam_intr = torch.tensor(intrinsics[i])
|
72 |
+
w2c = torch.tensor(extrinsics[i])
|
73 |
+
c2w = torch.inverse(w2c)
|
74 |
+
else:
|
75 |
+
cam_intr = intrinsics[i]
|
76 |
+
w2c = extrinsics[i]
|
77 |
+
c2w = torch.inverse(w2c)
|
78 |
+
min_depth, max_depth = near_fars[i][0], near_fars[i][1]
|
79 |
+
# todo: check the coresponding points are matched
|
80 |
+
|
81 |
+
view_frust_pts = get_view_frustum(min_depth, max_depth, img_hw, cam_intr, c2w)
|
82 |
+
bnds[:, 0] = torch.min(bnds[:, 0], torch.min(view_frust_pts, dim=1)[0])
|
83 |
+
bnds[:, 1] = torch.max(bnds[:, 1], torch.max(view_frust_pts, dim=1)[0])
|
84 |
+
view_frust_pts_list.append(view_frust_pts)
|
85 |
+
all_view_frust_pts = torch.cat(view_frust_pts_list, dim=1)
|
86 |
+
|
87 |
+
# print("all_view_frust_pts: ", all_view_frust_pts.shape)
|
88 |
+
# distance = torch.norm(all_view_frust_pts, dim=0)
|
89 |
+
# print("distance: ", distance)
|
90 |
+
|
91 |
+
# print("all_view_frust_pts_z: ", all_view_frust_pts[2, :])
|
92 |
+
|
93 |
+
center = torch.tensor(((bnds[0, 1] + bnds[0, 0]) / 2, (bnds[1, 1] + bnds[1, 0]) / 2,
|
94 |
+
(bnds[2, 1] + bnds[2, 0]) / 2))
|
95 |
+
|
96 |
+
lengths = bnds[:, 1] - bnds[:, 0]
|
97 |
+
|
98 |
+
max_length, _ = torch.max(lengths, dim=0)
|
99 |
+
radius = max_length / 2
|
100 |
+
|
101 |
+
# print("radius: ", radius)
|
102 |
+
return center, radius, bnds
|
SparseNeuS_demo_v1/evaluation/__init__.py
ADDED
File without changes
|
SparseNeuS_demo_v1/evaluation/clean_mesh.py
ADDED
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2 as cv
|
3 |
+
import os
|
4 |
+
from glob import glob
|
5 |
+
from scipy.io import loadmat
|
6 |
+
import trimesh
|
7 |
+
import open3d as o3d
|
8 |
+
import torch
|
9 |
+
from tqdm import tqdm
|
10 |
+
|
11 |
+
import sys
|
12 |
+
|
13 |
+
sys.path.append("../")
|
14 |
+
|
15 |
+
|
16 |
+
def gen_rays_from_single_image(H, W, image, intrinsic, c2w, depth=None, mask=None):
|
17 |
+
"""
|
18 |
+
generate rays in world space, for image image
|
19 |
+
:param H:
|
20 |
+
:param W:
|
21 |
+
:param intrinsics: [3,3]
|
22 |
+
:param c2ws: [4,4]
|
23 |
+
:return:
|
24 |
+
"""
|
25 |
+
device = image.device
|
26 |
+
ys, xs = torch.meshgrid(torch.linspace(0, H - 1, H),
|
27 |
+
torch.linspace(0, W - 1, W)) # pytorch's meshgrid has indexing='ij'
|
28 |
+
p = torch.stack([xs, ys, torch.ones_like(ys)], dim=-1) # H, W, 3
|
29 |
+
|
30 |
+
# normalized ndc uv coordinates, (-1, 1)
|
31 |
+
ndc_u = 2 * xs / (W - 1) - 1
|
32 |
+
ndc_v = 2 * ys / (H - 1) - 1
|
33 |
+
rays_ndc_uv = torch.stack([ndc_u, ndc_v], dim=-1).view(-1, 2).float().to(device)
|
34 |
+
|
35 |
+
intrinsic_inv = torch.inverse(intrinsic)
|
36 |
+
|
37 |
+
p = p.view(-1, 3).float().to(device) # N_rays, 3
|
38 |
+
p = torch.matmul(intrinsic_inv[None, :3, :3], p[:, :, None]).squeeze() # N_rays, 3
|
39 |
+
rays_v = p / torch.linalg.norm(p, ord=2, dim=-1, keepdim=True) # N_rays, 3
|
40 |
+
rays_v = torch.matmul(c2w[None, :3, :3], rays_v[:, :, None]).squeeze() # N_rays, 3
|
41 |
+
rays_o = c2w[None, :3, 3].expand(rays_v.shape) # N_rays, 3
|
42 |
+
|
43 |
+
image = image.permute(1, 2, 0)
|
44 |
+
color = image.view(-1, 3)
|
45 |
+
depth = depth.view(-1, 1) if depth is not None else None
|
46 |
+
mask = mask.view(-1, 1) if mask is not None else torch.ones([H * W, 1]).to(device)
|
47 |
+
sample = {
|
48 |
+
'rays_o': rays_o,
|
49 |
+
'rays_v': rays_v,
|
50 |
+
'rays_ndc_uv': rays_ndc_uv,
|
51 |
+
'rays_color': color,
|
52 |
+
# 'rays_depth': depth,
|
53 |
+
'rays_mask': mask,
|
54 |
+
'rays_norm_XYZ_cam': p # - XYZ_cam, before multiply depth
|
55 |
+
}
|
56 |
+
if depth is not None:
|
57 |
+
sample['rays_depth'] = depth
|
58 |
+
|
59 |
+
return sample
|
60 |
+
|
61 |
+
|
62 |
+
def load_K_Rt_from_P(filename, P=None):
|
63 |
+
if P is None:
|
64 |
+
lines = open(filename).read().splitlines()
|
65 |
+
if len(lines) == 4:
|
66 |
+
lines = lines[1:]
|
67 |
+
lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
|
68 |
+
P = np.asarray(lines).astype(np.float32).squeeze()
|
69 |
+
|
70 |
+
out = cv.decomposeProjectionMatrix(P)
|
71 |
+
K = out[0]
|
72 |
+
R = out[1]
|
73 |
+
t = out[2]
|
74 |
+
|
75 |
+
K = K / K[2, 2]
|
76 |
+
intrinsics = np.eye(4)
|
77 |
+
intrinsics[:3, :3] = K
|
78 |
+
|
79 |
+
pose = np.eye(4, dtype=np.float32)
|
80 |
+
pose[:3, :3] = R.transpose() # ? why need transpose here
|
81 |
+
pose[:3, 3] = (t[:3] / t[3])[:, 0]
|
82 |
+
|
83 |
+
return intrinsics, pose # ! return cam2world matrix here
|
84 |
+
|
85 |
+
|
86 |
+
def clean_points_by_mask(points, scan, imgs_idx=None, minimal_vis=0, mask_dilated_size=11):
|
87 |
+
cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
|
88 |
+
mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
|
89 |
+
n_images = 49 if scan < 83 else 64
|
90 |
+
inside_mask = np.zeros(len(points))
|
91 |
+
|
92 |
+
if imgs_idx is None:
|
93 |
+
imgs_idx = [i for i in range(n_images)]
|
94 |
+
|
95 |
+
# imgs_idx = [i for i in range(n_images)]
|
96 |
+
for i in imgs_idx:
|
97 |
+
P = cameras['world_mat_{}'.format(i)]
|
98 |
+
pts_image = np.matmul(P[None, :3, :3], points[:, :, None]).squeeze() + P[None, :3, 3]
|
99 |
+
pts_image = pts_image / pts_image[:, 2:]
|
100 |
+
pts_image = np.round(pts_image).astype(np.int32) + 1
|
101 |
+
|
102 |
+
mask_image = cv.imread(mask_lis[i])
|
103 |
+
kernel_size = mask_dilated_size # default 101
|
104 |
+
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
|
105 |
+
mask_image = cv.dilate(mask_image, kernel, iterations=1)
|
106 |
+
mask_image = (mask_image[:, :, 0] > 128)
|
107 |
+
|
108 |
+
mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0)
|
109 |
+
mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1)
|
110 |
+
|
111 |
+
in_mask = (pts_image[:, 0] >= 0) * (pts_image[:, 0] <= 1600) * (pts_image[:, 1] >= 0) * (
|
112 |
+
pts_image[:, 1] <= 1200) > 0
|
113 |
+
curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))]
|
114 |
+
|
115 |
+
curr_mask = curr_mask.astype(np.float32) * in_mask
|
116 |
+
|
117 |
+
inside_mask += curr_mask
|
118 |
+
|
119 |
+
return inside_mask > minimal_vis
|
120 |
+
|
121 |
+
|
122 |
+
def clean_mesh_faces_by_mask(mesh_file, new_mesh_file, scan, imgs_idx, minimal_vis=0, mask_dilated_size=11):
|
123 |
+
old_mesh = trimesh.load(mesh_file)
|
124 |
+
old_vertices = old_mesh.vertices[:]
|
125 |
+
old_faces = old_mesh.faces[:]
|
126 |
+
mask = clean_points_by_mask(old_vertices, scan, imgs_idx, minimal_vis, mask_dilated_size)
|
127 |
+
indexes = np.ones(len(old_vertices)) * -1
|
128 |
+
indexes = indexes.astype(np.long)
|
129 |
+
indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
|
130 |
+
|
131 |
+
faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
|
132 |
+
new_faces = old_faces[np.where(faces_mask)]
|
133 |
+
new_faces[:, 0] = indexes[new_faces[:, 0]]
|
134 |
+
new_faces[:, 1] = indexes[new_faces[:, 1]]
|
135 |
+
new_faces[:, 2] = indexes[new_faces[:, 2]]
|
136 |
+
new_vertices = old_vertices[np.where(mask)]
|
137 |
+
|
138 |
+
new_mesh = trimesh.Trimesh(new_vertices, new_faces)
|
139 |
+
|
140 |
+
new_mesh.export(new_mesh_file)
|
141 |
+
|
142 |
+
|
143 |
+
def clean_mesh_by_faces_num(mesh, faces_num=500):
|
144 |
+
old_vertices = mesh.vertices[:]
|
145 |
+
old_faces = mesh.faces[:]
|
146 |
+
|
147 |
+
cc = trimesh.graph.connected_components(mesh.face_adjacency, min_len=faces_num)
|
148 |
+
mask = np.zeros(len(mesh.faces), dtype=np.bool)
|
149 |
+
mask[np.concatenate(cc)] = True
|
150 |
+
|
151 |
+
indexes = np.ones(len(old_vertices)) * -1
|
152 |
+
indexes = indexes.astype(np.long)
|
153 |
+
indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
|
154 |
+
|
155 |
+
faces_mask = mask[old_faces[:, 0]] & mask[old_faces[:, 1]] & mask[old_faces[:, 2]]
|
156 |
+
new_faces = old_faces[np.where(faces_mask)]
|
157 |
+
new_faces[:, 0] = indexes[new_faces[:, 0]]
|
158 |
+
new_faces[:, 1] = indexes[new_faces[:, 1]]
|
159 |
+
new_faces[:, 2] = indexes[new_faces[:, 2]]
|
160 |
+
new_vertices = old_vertices[np.where(mask)]
|
161 |
+
|
162 |
+
new_mesh = trimesh.Trimesh(new_vertices, new_faces)
|
163 |
+
|
164 |
+
return new_mesh
|
165 |
+
|
166 |
+
|
167 |
+
def clean_mesh_faces_outside_frustum(old_mesh_file, new_mesh_file, imgs_idx, H=1200, W=1600, mask_dilated_size=11,
|
168 |
+
isolated_face_num=500, keep_largest=True):
|
169 |
+
'''Remove faces of mesh which cannot be orserved by all cameras
|
170 |
+
'''
|
171 |
+
# if path_mask_npz:
|
172 |
+
# path_save_clean = IOUtils.add_file_name_suffix(path_save_clean, '_mask')
|
173 |
+
|
174 |
+
cameras = np.load('{}/scan{}/cameras.npz'.format(DTU_DIR, scan))
|
175 |
+
mask_lis = sorted(glob('{}/scan{}/mask/*.png'.format(DTU_DIR, scan)))
|
176 |
+
|
177 |
+
mesh = trimesh.load(old_mesh_file)
|
178 |
+
intersector = trimesh.ray.ray_pyembree.RayMeshIntersector(mesh)
|
179 |
+
|
180 |
+
all_indices = []
|
181 |
+
chunk_size = 5120
|
182 |
+
for i in imgs_idx:
|
183 |
+
mask_image = cv.imread(mask_lis[i])
|
184 |
+
kernel_size = mask_dilated_size # default 101
|
185 |
+
kernel = cv.getStructuringElement(cv.MORPH_ELLIPSE, (kernel_size, kernel_size))
|
186 |
+
mask_image = cv.dilate(mask_image, kernel, iterations=1)
|
187 |
+
|
188 |
+
P = cameras['world_mat_{}'.format(i)]
|
189 |
+
|
190 |
+
intrinsic, pose = load_K_Rt_from_P(None, P[:3, :])
|
191 |
+
|
192 |
+
rays = gen_rays_from_single_image(H, W, torch.from_numpy(mask_image).permute(2, 0, 1).float(),
|
193 |
+
torch.from_numpy(intrinsic)[:3, :3].float(),
|
194 |
+
torch.from_numpy(pose).float())
|
195 |
+
rays_o = rays['rays_o']
|
196 |
+
rays_d = rays['rays_v']
|
197 |
+
rays_mask = rays['rays_color']
|
198 |
+
|
199 |
+
rays_o = rays_o.split(chunk_size)
|
200 |
+
rays_d = rays_d.split(chunk_size)
|
201 |
+
rays_mask = rays_mask.split(chunk_size)
|
202 |
+
|
203 |
+
for rays_o_batch, rays_d_batch, rays_mask_batch in tqdm(zip(rays_o, rays_d, rays_mask)):
|
204 |
+
rays_mask_batch = rays_mask_batch[:, 0] > 128
|
205 |
+
rays_o_batch = rays_o_batch[rays_mask_batch]
|
206 |
+
rays_d_batch = rays_d_batch[rays_mask_batch]
|
207 |
+
|
208 |
+
idx_faces_hits = intersector.intersects_first(rays_o_batch.cpu().numpy(), rays_d_batch.cpu().numpy())
|
209 |
+
all_indices.append(idx_faces_hits)
|
210 |
+
|
211 |
+
values = np.unique(np.concatenate(all_indices, axis=0))
|
212 |
+
mask_faces = np.ones(len(mesh.faces))
|
213 |
+
mask_faces[values[1:]] = 0
|
214 |
+
print(f'Surfaces/Kept: {len(mesh.faces)}/{len(values)}')
|
215 |
+
|
216 |
+
mesh_o3d = o3d.io.read_triangle_mesh(old_mesh_file)
|
217 |
+
print("removing triangles by mask")
|
218 |
+
mesh_o3d.remove_triangles_by_mask(mask_faces)
|
219 |
+
|
220 |
+
o3d.io.write_triangle_mesh(new_mesh_file, mesh_o3d)
|
221 |
+
|
222 |
+
# # clean meshes
|
223 |
+
new_mesh = trimesh.load(new_mesh_file)
|
224 |
+
cc = trimesh.graph.connected_components(new_mesh.face_adjacency, min_len=500)
|
225 |
+
mask = np.zeros(len(new_mesh.faces), dtype=np.bool)
|
226 |
+
mask[np.concatenate(cc)] = True
|
227 |
+
new_mesh.update_faces(mask)
|
228 |
+
new_mesh.remove_unreferenced_vertices()
|
229 |
+
new_mesh.export(new_mesh_file)
|
230 |
+
|
231 |
+
# meshes = new_mesh.split(only_watertight=False)
|
232 |
+
#
|
233 |
+
# if not keep_largest:
|
234 |
+
# meshes = [mesh for mesh in meshes if len(mesh.faces) > isolated_face_num]
|
235 |
+
# # new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
|
236 |
+
# merged_mesh = trimesh.util.concatenate(meshes)
|
237 |
+
# merged_mesh.export(new_mesh_file)
|
238 |
+
# else:
|
239 |
+
# new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
|
240 |
+
# new_mesh.export(new_mesh_file)
|
241 |
+
|
242 |
+
o3d.io.write_triangle_mesh(new_mesh_file.replace(".ply", "_raw.ply"), mesh_o3d)
|
243 |
+
print("finishing removing triangles")
|
244 |
+
|
245 |
+
|
246 |
+
def clean_outliers(old_mesh_file, new_mesh_file):
|
247 |
+
new_mesh = trimesh.load(old_mesh_file)
|
248 |
+
|
249 |
+
meshes = new_mesh.split(only_watertight=False)
|
250 |
+
new_mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
|
251 |
+
|
252 |
+
new_mesh.export(new_mesh_file)
|
253 |
+
|
254 |
+
|
255 |
+
if __name__ == "__main__":
|
256 |
+
|
257 |
+
scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
|
258 |
+
|
259 |
+
mask_kernel_size = 11
|
260 |
+
|
261 |
+
imgs_idx = [0, 1, 2]
|
262 |
+
# imgs_idx = [42, 43, 44]
|
263 |
+
# imgs_idx = [1, 8, 9]
|
264 |
+
|
265 |
+
DTU_DIR = "/home/xiaoxiao/dataset/DTU_IDR/DTU"
|
266 |
+
# DTU_DIR = "/userhome/cs/xxlong/dataset/DTU_IDR/DTU"
|
267 |
+
|
268 |
+
base_path = "/home/xiaoxiao/Workplace/nerf_reconstruction/Volume_NeuS/neus_camsys/exp/dtu/evaluation_23_24_33_new/volsdf"
|
269 |
+
|
270 |
+
for scan in scans:
|
271 |
+
print("processing scan%d" % scan)
|
272 |
+
dir_path = os.path.join(base_path, "scan%d" % scan)
|
273 |
+
|
274 |
+
old_mesh_file = glob(os.path.join(dir_path, "*.ply"))[0]
|
275 |
+
|
276 |
+
clean_mesh_file = os.path.join(dir_path, "clean_%03d.ply" % scan)
|
277 |
+
final_mesh_file = os.path.join(dir_path, "final_%03d.ply" % scan)
|
278 |
+
|
279 |
+
clean_mesh_faces_by_mask(old_mesh_file, clean_mesh_file, scan, imgs_idx, minimal_vis=1,
|
280 |
+
mask_dilated_size=mask_kernel_size)
|
281 |
+
clean_mesh_faces_outside_frustum(clean_mesh_file, final_mesh_file, imgs_idx, mask_dilated_size=mask_kernel_size)
|
282 |
+
|
283 |
+
print("finish processing scan%d" % scan)
|
SparseNeuS_demo_v1/evaluation/eval_dtu_python.py
ADDED
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import open3d as o3d
|
3 |
+
import sklearn.neighbors as skln
|
4 |
+
from tqdm import tqdm
|
5 |
+
from scipy.io import loadmat
|
6 |
+
import multiprocessing as mp
|
7 |
+
import argparse, os, sys
|
8 |
+
import cv2 as cv
|
9 |
+
|
10 |
+
from pathlib import Path
|
11 |
+
|
12 |
+
|
13 |
+
def get_path_components(path):
|
14 |
+
path = Path(path)
|
15 |
+
ppath = str(path.parent)
|
16 |
+
stem = str(path.stem)
|
17 |
+
ext = str(path.suffix)
|
18 |
+
return ppath, stem, ext
|
19 |
+
|
20 |
+
|
21 |
+
def sample_single_tri(input_):
|
22 |
+
n1, n2, v1, v2, tri_vert = input_
|
23 |
+
c = np.mgrid[:n1 + 1, :n2 + 1]
|
24 |
+
c += 0.5
|
25 |
+
c[0] /= max(n1, 1e-7)
|
26 |
+
c[1] /= max(n2, 1e-7)
|
27 |
+
c = np.transpose(c, (1, 2, 0))
|
28 |
+
k = c[c.sum(axis=-1) < 1] # m2
|
29 |
+
q = v1 * k[:, :1] + v2 * k[:, 1:] + tri_vert
|
30 |
+
return q
|
31 |
+
|
32 |
+
|
33 |
+
def write_vis_pcd(file, points, colors):
|
34 |
+
pcd = o3d.geometry.PointCloud()
|
35 |
+
pcd.points = o3d.utility.Vector3dVector(points)
|
36 |
+
pcd.colors = o3d.utility.Vector3dVector(colors)
|
37 |
+
o3d.io.write_point_cloud(file, pcd)
|
38 |
+
|
39 |
+
|
40 |
+
def eval_cloud(args, num_cpu_cores=-1):
|
41 |
+
mp.freeze_support()
|
42 |
+
os.makedirs(args.vis_out_dir, exist_ok=True)
|
43 |
+
|
44 |
+
thresh = args.downsample_density
|
45 |
+
if args.mode == 'mesh':
|
46 |
+
pbar = tqdm(total=9)
|
47 |
+
pbar.set_description('read data mesh')
|
48 |
+
data_mesh = o3d.io.read_triangle_mesh(args.data)
|
49 |
+
|
50 |
+
vertices = np.asarray(data_mesh.vertices)
|
51 |
+
triangles = np.asarray(data_mesh.triangles)
|
52 |
+
tri_vert = vertices[triangles]
|
53 |
+
|
54 |
+
pbar.update(1)
|
55 |
+
pbar.set_description('sample pcd from mesh')
|
56 |
+
v1 = tri_vert[:, 1] - tri_vert[:, 0]
|
57 |
+
v2 = tri_vert[:, 2] - tri_vert[:, 0]
|
58 |
+
l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
|
59 |
+
l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
|
60 |
+
area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
|
61 |
+
non_zero_area = (area2 > 0)[:, 0]
|
62 |
+
l1, l2, area2, v1, v2, tri_vert = [
|
63 |
+
arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
|
64 |
+
]
|
65 |
+
thr = thresh * np.sqrt(l1 * l2 / area2)
|
66 |
+
n1 = np.floor(l1 / thr)
|
67 |
+
n2 = np.floor(l2 / thr)
|
68 |
+
|
69 |
+
with mp.Pool() as mp_pool:
|
70 |
+
new_pts = mp_pool.map(sample_single_tri,
|
71 |
+
((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
|
72 |
+
range(len(n1))), chunksize=1024)
|
73 |
+
|
74 |
+
new_pts = np.concatenate(new_pts, axis=0)
|
75 |
+
data_pcd = np.concatenate([vertices, new_pts], axis=0)
|
76 |
+
|
77 |
+
elif args.mode == 'pcd':
|
78 |
+
pbar = tqdm(total=8)
|
79 |
+
pbar.set_description('read data pcd')
|
80 |
+
data_pcd_o3d = o3d.io.read_point_cloud(args.data)
|
81 |
+
data_pcd = np.asarray(data_pcd_o3d.points)
|
82 |
+
|
83 |
+
pbar.update(1)
|
84 |
+
pbar.set_description('random shuffle pcd index')
|
85 |
+
shuffle_rng = np.random.default_rng()
|
86 |
+
shuffle_rng.shuffle(data_pcd, axis=0)
|
87 |
+
|
88 |
+
pbar.update(1)
|
89 |
+
pbar.set_description('downsample pcd')
|
90 |
+
nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=num_cpu_cores)
|
91 |
+
nn_engine.fit(data_pcd)
|
92 |
+
rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
|
93 |
+
mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
|
94 |
+
for curr, idxs in enumerate(rnn_idxs):
|
95 |
+
if mask[curr]:
|
96 |
+
mask[idxs] = 0
|
97 |
+
mask[curr] = 1
|
98 |
+
data_down = data_pcd[mask]
|
99 |
+
|
100 |
+
pbar.update(1)
|
101 |
+
pbar.set_description('masking data pcd')
|
102 |
+
obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
|
103 |
+
ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
|
104 |
+
BB = BB.astype(np.float32)
|
105 |
+
|
106 |
+
patch = args.patch_size
|
107 |
+
inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
|
108 |
+
data_in = data_down[inbound]
|
109 |
+
|
110 |
+
data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
|
111 |
+
grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
|
112 |
+
data_grid_in = data_grid[grid_inbound]
|
113 |
+
in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
|
114 |
+
data_in_obs = data_in[grid_inbound][in_obs]
|
115 |
+
|
116 |
+
pbar.update(1)
|
117 |
+
pbar.set_description('read STL pcd')
|
118 |
+
stl_pcd = o3d.io.read_point_cloud(args.gt)
|
119 |
+
stl = np.asarray(stl_pcd.points)
|
120 |
+
|
121 |
+
pbar.update(1)
|
122 |
+
pbar.set_description('compute data2stl')
|
123 |
+
nn_engine.fit(stl)
|
124 |
+
dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
|
125 |
+
max_dist = args.max_dist
|
126 |
+
mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
|
127 |
+
|
128 |
+
pbar.update(1)
|
129 |
+
pbar.set_description('compute stl2data')
|
130 |
+
ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
|
131 |
+
|
132 |
+
stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
|
133 |
+
above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
|
134 |
+
stl_above = stl[above]
|
135 |
+
|
136 |
+
nn_engine.fit(data_in)
|
137 |
+
dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
|
138 |
+
mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
|
139 |
+
|
140 |
+
pbar.update(1)
|
141 |
+
pbar.set_description('visualize error')
|
142 |
+
vis_dist = args.visualize_threshold
|
143 |
+
R = np.array([[1, 0, 0]], dtype=np.float64)
|
144 |
+
G = np.array([[0, 1, 0]], dtype=np.float64)
|
145 |
+
B = np.array([[0, 0, 1]], dtype=np.float64)
|
146 |
+
W = np.array([[1, 1, 1]], dtype=np.float64)
|
147 |
+
data_color = np.tile(B, (data_down.shape[0], 1))
|
148 |
+
data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
|
149 |
+
data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
|
150 |
+
data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
|
151 |
+
write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
|
152 |
+
stl_color = np.tile(B, (stl.shape[0], 1))
|
153 |
+
stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
|
154 |
+
stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
|
155 |
+
stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
|
156 |
+
write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
|
157 |
+
|
158 |
+
pbar.update(1)
|
159 |
+
pbar.set_description('done')
|
160 |
+
pbar.close()
|
161 |
+
over_all = (mean_d2s + mean_s2d) / 2
|
162 |
+
print(f'ean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d} over_all: {over_all}; .')
|
163 |
+
|
164 |
+
pparent, stem, ext = get_path_components(args.data)
|
165 |
+
if args.log is None:
|
166 |
+
path_log = os.path.join(pparent, 'eval_result.txt')
|
167 |
+
else:
|
168 |
+
path_log = args.log
|
169 |
+
with open(path_log, 'a+') as fLog:
|
170 |
+
fLog.write(f'mean_d2gt {np.round(mean_d2s, 3)} '
|
171 |
+
f'mean_gt2d {np.round(mean_s2d, 3)} '
|
172 |
+
f'Over_all {np.round(over_all, 3)} '
|
173 |
+
f'[{stem}] \n')
|
174 |
+
|
175 |
+
return over_all, mean_d2s, mean_s2d
|
176 |
+
|
177 |
+
|
178 |
+
if __name__ == '__main__':
|
179 |
+
from glob import glob
|
180 |
+
|
181 |
+
mp.freeze_support()
|
182 |
+
|
183 |
+
parser = argparse.ArgumentParser()
|
184 |
+
parser.add_argument('--data', type=str, default='data_in.ply')
|
185 |
+
parser.add_argument('--gt', type=str, help='ground truth')
|
186 |
+
parser.add_argument('--scan', type=int, default=1)
|
187 |
+
parser.add_argument('--mode', type=str, default='mesh', choices=['mesh', 'pcd'])
|
188 |
+
parser.add_argument('--dataset_dir', type=str, default='/dataset/dtu_official/SampleSet/MVS_Data')
|
189 |
+
parser.add_argument('--vis_out_dir', type=str, default='.')
|
190 |
+
parser.add_argument('--downsample_density', type=float, default=0.2)
|
191 |
+
parser.add_argument('--patch_size', type=float, default=60)
|
192 |
+
parser.add_argument('--max_dist', type=float, default=20)
|
193 |
+
parser.add_argument('--visualize_threshold', type=float, default=10)
|
194 |
+
parser.add_argument('--log', type=str, default=None)
|
195 |
+
args = parser.parse_args()
|
196 |
+
|
197 |
+
base_dir = "./exp"
|
198 |
+
|
199 |
+
GT_DIR = "./gt_pcd"
|
200 |
+
|
201 |
+
scans = [24, 37, 40, 55, 63, 65, 69, 83, 97, 105, 106, 110, 114, 118, 122]
|
202 |
+
|
203 |
+
for scan in scans:
|
204 |
+
|
205 |
+
print("processing scan%d" % scan)
|
206 |
+
|
207 |
+
args.data = os.path.join(base_dir, "scan{}".format(scan), "final_%03d.ply" % scan)
|
208 |
+
|
209 |
+
if not os.path.exists(args.data):
|
210 |
+
continue
|
211 |
+
|
212 |
+
args.gt = os.path.join(GT_DIR, "stl%03d_total.ply" % scan)
|
213 |
+
args.vis_out_dir = os.path.join(base_dir, "scan{}".format(scan))
|
214 |
+
args.scan = scan
|
215 |
+
os.makedirs(args.vis_out_dir, exist_ok=True)
|
216 |
+
|
217 |
+
dist_thred1 = 1
|
218 |
+
dist_thred2 = 2
|
219 |
+
|
220 |
+
thresh = args.downsample_density
|
221 |
+
|
222 |
+
if args.mode == 'mesh':
|
223 |
+
pbar = tqdm(total=9)
|
224 |
+
pbar.set_description('read data mesh')
|
225 |
+
data_mesh = o3d.io.read_triangle_mesh(args.data)
|
226 |
+
|
227 |
+
vertices = np.asarray(data_mesh.vertices)
|
228 |
+
triangles = np.asarray(data_mesh.triangles)
|
229 |
+
tri_vert = vertices[triangles]
|
230 |
+
|
231 |
+
pbar.update(1)
|
232 |
+
pbar.set_description('sample pcd from mesh')
|
233 |
+
v1 = tri_vert[:, 1] - tri_vert[:, 0]
|
234 |
+
v2 = tri_vert[:, 2] - tri_vert[:, 0]
|
235 |
+
l1 = np.linalg.norm(v1, axis=-1, keepdims=True)
|
236 |
+
l2 = np.linalg.norm(v2, axis=-1, keepdims=True)
|
237 |
+
area2 = np.linalg.norm(np.cross(v1, v2), axis=-1, keepdims=True)
|
238 |
+
non_zero_area = (area2 > 0)[:, 0]
|
239 |
+
l1, l2, area2, v1, v2, tri_vert = [
|
240 |
+
arr[non_zero_area] for arr in [l1, l2, area2, v1, v2, tri_vert]
|
241 |
+
]
|
242 |
+
thr = thresh * np.sqrt(l1 * l2 / area2)
|
243 |
+
n1 = np.floor(l1 / thr)
|
244 |
+
n2 = np.floor(l2 / thr)
|
245 |
+
|
246 |
+
with mp.Pool() as mp_pool:
|
247 |
+
new_pts = mp_pool.map(sample_single_tri,
|
248 |
+
((n1[i, 0], n2[i, 0], v1[i:i + 1], v2[i:i + 1], tri_vert[i:i + 1, 0]) for i in
|
249 |
+
range(len(n1))), chunksize=1024)
|
250 |
+
|
251 |
+
new_pts = np.concatenate(new_pts, axis=0)
|
252 |
+
data_pcd = np.concatenate([vertices, new_pts], axis=0)
|
253 |
+
|
254 |
+
elif args.mode == 'pcd':
|
255 |
+
pbar = tqdm(total=8)
|
256 |
+
pbar.set_description('read data pcd')
|
257 |
+
data_pcd_o3d = o3d.io.read_point_cloud(args.data)
|
258 |
+
data_pcd = np.asarray(data_pcd_o3d.points)
|
259 |
+
|
260 |
+
pbar.update(1)
|
261 |
+
pbar.set_description('random shuffle pcd index')
|
262 |
+
shuffle_rng = np.random.default_rng()
|
263 |
+
shuffle_rng.shuffle(data_pcd, axis=0)
|
264 |
+
|
265 |
+
pbar.update(1)
|
266 |
+
pbar.set_description('downsample pcd')
|
267 |
+
nn_engine = skln.NearestNeighbors(n_neighbors=1, radius=thresh, algorithm='kd_tree', n_jobs=-1)
|
268 |
+
nn_engine.fit(data_pcd)
|
269 |
+
rnn_idxs = nn_engine.radius_neighbors(data_pcd, radius=thresh, return_distance=False)
|
270 |
+
mask = np.ones(data_pcd.shape[0], dtype=np.bool_)
|
271 |
+
for curr, idxs in enumerate(rnn_idxs):
|
272 |
+
if mask[curr]:
|
273 |
+
mask[idxs] = 0
|
274 |
+
mask[curr] = 1
|
275 |
+
data_down = data_pcd[mask]
|
276 |
+
|
277 |
+
pbar.update(1)
|
278 |
+
pbar.set_description('masking data pcd')
|
279 |
+
obs_mask_file = loadmat(f'{args.dataset_dir}/ObsMask/ObsMask{args.scan}_10.mat')
|
280 |
+
ObsMask, BB, Res = [obs_mask_file[attr] for attr in ['ObsMask', 'BB', 'Res']]
|
281 |
+
BB = BB.astype(np.float32)
|
282 |
+
|
283 |
+
patch = args.patch_size
|
284 |
+
inbound = ((data_down >= BB[:1] - patch) & (data_down < BB[1:] + patch * 2)).sum(axis=-1) == 3
|
285 |
+
data_in = data_down[inbound]
|
286 |
+
|
287 |
+
data_grid = np.around((data_in - BB[:1]) / Res).astype(np.int32)
|
288 |
+
grid_inbound = ((data_grid >= 0) & (data_grid < np.expand_dims(ObsMask.shape, 0))).sum(axis=-1) == 3
|
289 |
+
data_grid_in = data_grid[grid_inbound]
|
290 |
+
in_obs = ObsMask[data_grid_in[:, 0], data_grid_in[:, 1], data_grid_in[:, 2]].astype(np.bool_)
|
291 |
+
data_in_obs = data_in[grid_inbound][in_obs]
|
292 |
+
|
293 |
+
pbar.update(1)
|
294 |
+
pbar.set_description('read STL pcd')
|
295 |
+
stl_pcd = o3d.io.read_point_cloud(args.gt)
|
296 |
+
stl = np.asarray(stl_pcd.points)
|
297 |
+
|
298 |
+
pbar.update(1)
|
299 |
+
pbar.set_description('compute data2stl')
|
300 |
+
nn_engine.fit(stl)
|
301 |
+
dist_d2s, idx_d2s = nn_engine.kneighbors(data_in_obs, n_neighbors=1, return_distance=True)
|
302 |
+
max_dist = args.max_dist
|
303 |
+
mean_d2s = dist_d2s[dist_d2s < max_dist].mean()
|
304 |
+
|
305 |
+
precision_1 = len(dist_d2s[dist_d2s < dist_thred1]) / len(dist_d2s)
|
306 |
+
precision_2 = len(dist_d2s[dist_d2s < dist_thred2]) / len(dist_d2s)
|
307 |
+
|
308 |
+
pbar.update(1)
|
309 |
+
pbar.set_description('compute stl2data')
|
310 |
+
ground_plane = loadmat(f'{args.dataset_dir}/ObsMask/Plane{args.scan}.mat')['P']
|
311 |
+
|
312 |
+
stl_hom = np.concatenate([stl, np.ones_like(stl[:, :1])], -1)
|
313 |
+
above = (ground_plane.reshape((1, 4)) * stl_hom).sum(-1) > 0
|
314 |
+
|
315 |
+
stl_above = stl[above]
|
316 |
+
|
317 |
+
nn_engine.fit(data_in)
|
318 |
+
dist_s2d, idx_s2d = nn_engine.kneighbors(stl_above, n_neighbors=1, return_distance=True)
|
319 |
+
mean_s2d = dist_s2d[dist_s2d < max_dist].mean()
|
320 |
+
|
321 |
+
recall_1 = len(dist_s2d[dist_s2d < dist_thred1]) / len(dist_s2d)
|
322 |
+
recall_2 = len(dist_s2d[dist_s2d < dist_thred2]) / len(dist_s2d)
|
323 |
+
|
324 |
+
pbar.update(1)
|
325 |
+
pbar.set_description('visualize error')
|
326 |
+
vis_dist = args.visualize_threshold
|
327 |
+
R = np.array([[1, 0, 0]], dtype=np.float64)
|
328 |
+
G = np.array([[0, 1, 0]], dtype=np.float64)
|
329 |
+
B = np.array([[0, 0, 1]], dtype=np.float64)
|
330 |
+
W = np.array([[1, 1, 1]], dtype=np.float64)
|
331 |
+
data_color = np.tile(B, (data_down.shape[0], 1))
|
332 |
+
data_alpha = dist_d2s.clip(max=vis_dist) / vis_dist
|
333 |
+
data_color[np.where(inbound)[0][grid_inbound][in_obs]] = R * data_alpha + W * (1 - data_alpha)
|
334 |
+
data_color[np.where(inbound)[0][grid_inbound][in_obs][dist_d2s[:, 0] >= max_dist]] = G
|
335 |
+
write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_d2gt.ply', data_down, data_color)
|
336 |
+
stl_color = np.tile(B, (stl.shape[0], 1))
|
337 |
+
stl_alpha = dist_s2d.clip(max=vis_dist) / vis_dist
|
338 |
+
stl_color[np.where(above)[0]] = R * stl_alpha + W * (1 - stl_alpha)
|
339 |
+
stl_color[np.where(above)[0][dist_s2d[:, 0] >= max_dist]] = G
|
340 |
+
write_vis_pcd(f'{args.vis_out_dir}/vis_{args.scan:03}_gt2d.ply', stl, stl_color)
|
341 |
+
|
342 |
+
pbar.update(1)
|
343 |
+
pbar.set_description('done')
|
344 |
+
pbar.close()
|
345 |
+
over_all = (mean_d2s + mean_s2d) / 2
|
346 |
+
|
347 |
+
fscore_1 = 2 * precision_1 * recall_1 / (precision_1 + recall_1 + 1e-6)
|
348 |
+
fscore_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2 + 1e-6)
|
349 |
+
|
350 |
+
print(f'over_all: {over_all}; mean_d2gt: {mean_d2s}; mean_gt2d: {mean_s2d}.')
|
351 |
+
print(f'precision_1mm: {precision_1}; recall_1mm: {recall_1}; fscore_1mm: {fscore_1}')
|
352 |
+
print(f'precision_2mm: {precision_2}; recall_2mm: {recall_2}; fscore_2mm: {fscore_2}')
|
353 |
+
|
354 |
+
pparent, stem, ext = get_path_components(args.data)
|
355 |
+
if args.log is None:
|
356 |
+
path_log = os.path.join(pparent, 'eval_result.txt')
|
357 |
+
else:
|
358 |
+
path_log = args.log
|
359 |
+
with open(path_log, 'w+') as fLog:
|
360 |
+
fLog.write(f'over_all {np.round(over_all, 3)} '
|
361 |
+
f'mean_d2gt {np.round(mean_d2s, 3)} '
|
362 |
+
f'mean_gt2d {np.round(mean_s2d, 3)} \n'
|
363 |
+
f'precision_1mm {np.round(precision_1, 3)} '
|
364 |
+
f'recall_1mm {np.round(recall_1, 3)} '
|
365 |
+
f'fscore_1mm {np.round(fscore_1, 3)} \n'
|
366 |
+
f'precision_2mm {np.round(precision_2, 3)} '
|
367 |
+
f'recall_2mm {np.round(recall_2, 3)} '
|
368 |
+
f'fscore_2mm {np.round(fscore_2, 3)} \n'
|
369 |
+
f'[{stem}] \n')
|
SparseNeuS_demo_v1/exp/lod0/checkpoint_trash/ckpt_285000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:763c2a4934928cc089342905ba61481d6f9efc977b9729d7fc2d3eae4f0e1f9b
|
3 |
+
size 5310703
|