Image-to-3D
English
wala
mult-iview-to-3d
File size: 5,042 Bytes
4e82918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
{
    "strategy": null,
    "gpu_workers": 8,
    "restore_path": null,
    "prefetch_factor": null,
    "matual_precision": "medium",
    "max_concurrency": 1500,
    "multipart_size": 8388608,
    "print_every": 100,
    "save_every": 100,
    "validation_every": 15000,
    "validation_every_log": 500,
    "visualization_every": 15000,
    "log_level": "info",
    "experiment_type": "max",
    "experiment_every": 5,
    "fast_dev_run": false,
    "limit_val_batches": 0.05,
    "ft_train_number": null,
    "ft_train_datasets": null,
    "val_cnt": null,
    "use_even_val": false,
    "use_ema": true,
    "ema_decay": 0.9999,
    "use_ema_weights": true,
    "use_compile": true,
    "batch_size": 8,
    "test_batch_size": 8,
    "num_workers": 23,
    "test_threshold": 0.05,
    "num_gpus": 8,
    "seed": 1,
    "epochs": 300,
    "optimizer": "Adam",
    "lr": 0.0001,
    "train_mode": "train",
    "use_local_storage": false,
    "auto_exp_name": null,
    "input_type": "Wavelet",
    "output_type": "Wavelet",
    "encoder_type": "General_Encoder_Down_2",
    "decoder_type": "General_Decoder_Up_2",
    "encoder_num_tran": 0,
    "decoder_num_tran": 0,
    "last_feature_transform": null,
    "reconstruct_loss_type": "mean",
    "quantizer_type": "original",
    "normalize_latent": null,
    "e_dim": 4,
    "n_e": 1024,
    "beta": 0.25,
    "sample_mode": "bilinear",
    "padding": 0.1,
    "gamma": 1,
    "grid_size": 12,
    "t_loss": 1.0,
    "num_latent_tokens": 256,
    "dataset_name": "all",
    "voxel_transform": null,
    "num_points": 2048,
    "num_sdf_points": 5000,
    "categories": null,
    "resolution": 256,
    "max_depth": 3,
    "max_training_level": 2,
    "point_num": 16384,
    "keep_level": 2,
    "data_keep_level": 2,
    "wavelet": "bior6.8",
    "padding_mode": "constant",
    "use_normalization": false,
    "use_shift_mean": false,
    "start_stage": 0,
    "use_adaptive_stage_update": false,
    "no_rebalance_loss": true,
    "use_compact_indices": true,
    "sample_threshold_ratio": 0.03125,
    "use_batched_threshold": true,
    "use_sample_training": true,
    "use_sample_threshold": true,
    "div_hyp": 1.0,
    "checkpoint": null,
    "use_timestamp": false,
    "num_iterations": 300000,
    "gpu": "0",
    "threshold": 0.45,
    "sampling_type": null,
    "auto_precision": "bf16",
    "gradient_clip_val": 1.0,
    "dropout": 0.0,
    "sdf_points": 20000,
    "sdf_sample_type": "mixture",
    "sdf_res": 256,
    "greater_or_no": true,
    "s3_bucket": "build3d-wavelets",
    "s3_prefix": "dataset",
    "use_s3": true,
    "wavelet_transform": "all",
    "test_file_name": "model_performance.csv",
    "network_type": "latent_uvit",
    "diffusion_beta_schedule": "cosine",
    "diffusion_step": 1000,
    "diffusion_rescale_timestep": 100,
    "diffusion_scale_ratio": 1.0,
    "diffusion_model_var_type": "FIXED_SMALL",
    "diffusion_model_mean_type": "START_X",
    "diffusion_loss_type": "MSE",
    "diffusion_sampler": "second-order",
    "dit_block_type": "cross_dit",
    "att_patch_size": 1,
    "att_hidden_size": 1152,
    "transformer_num_blocks": 32,
    "transformer_num_heads": 16,
    "transformer_add_num_register": 0,
    "unet_model_channels": 128,
    "unet_num_res_blocks": [
        3
    ],
    "learnable_skip_r": null,
    "add_condition_res_ch": 128,
    "with_fix_pos": true,
    "cond_num_mapping_layers": 0,
    "add_condition_time_ch": true,
    "add_condition_input_ch": null,
    "use_pointcloud_conditions": false,
    "pc_encoder_type": "PointNet_Simple",
    "use_pointvoxel_encoder": false,
    "num_pc_points": 2500,
    "use_pc_samples": false,
    "sample_num": 2500,
    "pc_dims": 1024,
    "num_inds": 1024,
    "pc_output_dim": 1024,
    "use_voxel_conditions": false,
    "voxel_context_dim": 1024,
    "voxel_dim": 8,
    "voxel_resolution": 16,
    "use_image_conditions": true,
    "use_camera_index": true,
    "render_resolution": 384,
    "max_images_num": 55,
    "image_transform": "dino",
    "clip_model_type": "dino-l-14_reg",
    "input_view_cnt": 4,
    "use_multiple_views_grids": true,
    "training_views": [
        3,
        6,
        10,
        26
    ],
    "testing_views": null,
    "use_depth_conditions": false,
    "use_wavelet_conditions": false,
    "dp_cond": 0.05,
    "scale": 3,
    "guidance_type": null,
    "dp_cond_type": null,
    "use_autoencoder": false,
    "checkpoint_type": "last_ft_10000",
    "pre_quant": true,
    "latent_normalization": false,
    "use_autoencoder_ema": false,
    "filter_path": "filter_list",
    "finetune": false,
    "finetune_dp_cond": null,
    "finetune_dp_cond_type": null,
    "test_log_num": null,
    "precision": "bf16",
    "gradient_clip_val_2": 1.0,
    "wavelet_transform_2": null,
    "weight_decay": 0.0,
    "opt_eps": 1e-08,
    "latent_lr": 0.0001,
    "pin_memory": false,
    "n_px": 224,
    "cond_emb_dim": 1024,
    "cond_grid_size": 256,
    "cond_grid_emb_size": 1024,
    "condition_dim": 1024,
    "num_cond_vectors": 256
}