Text-to-3D
wyysf commited on
Commit
a73431a
1 Parent(s): 500a291
image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/config.yaml ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
2
+ description: ''
3
+ tag: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
4
+ seed: 0
5
+ use_timestamp: true
6
+ timestamp: ''
7
+ exp_root_dir: outputs
8
+ exp_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
9
+ trial_name: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
10
+ trial_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k/michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
11
+ n_gpus: 8
12
+ resume: ./ckpts/3DNativeGeneration/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k.ckpt
13
+ data_type: objaverse-datamodule
14
+ data:
15
+ root_dir: data/objaverse_clean/cap3d_high_quality_170k_images
16
+ data_type: occupancy
17
+ n_samples: 4096
18
+ noise_sigma: 0.0
19
+ load_supervision: false
20
+ supervision_type: occupancy
21
+ n_supervision: 10000
22
+ load_image: true
23
+ image_data_path: data/objaverse_clean/raw_data/images/cap3d_high_quality_170k
24
+ image_type: mvrgb
25
+ idx:
26
+ - 0
27
+ - 4
28
+ - 8
29
+ - 12
30
+ - 16
31
+ n_views: 4
32
+ load_caption: false
33
+ rotate_points: false
34
+ batch_size: 32
35
+ num_workers: 16
36
+ system_type: shape-diffusion-system
37
+ system:
38
+ val_samples_json: val_data/mv_images/val_samples_rgb_mvimage.json
39
+ z_scale_factor: 1.0
40
+ guidance_scale: 7.5
41
+ num_inference_steps: 50
42
+ eta: 0.0
43
+ shape_model_type: michelangelo-aligned-autoencoder
44
+ shape_model:
45
+ num_latents: 256
46
+ embed_dim: 64
47
+ point_feats: 3
48
+ out_dim: 1
49
+ num_freqs: 8
50
+ include_pi: false
51
+ heads: 12
52
+ width: 768
53
+ num_encoder_layers: 8
54
+ num_decoder_layers: 16
55
+ use_ln_post: true
56
+ init_scale: 0.25
57
+ qkv_bias: false
58
+ use_flash: true
59
+ use_checkpoint: true
60
+ condition_model_type: clip-embedder
61
+ condition_model:
62
+ pretrained_model_name_or_path: openai/clip-vit-large-patch14
63
+ encode_camera: true
64
+ camera_embeds_dim: 32
65
+ n_views: 4
66
+ empty_embeds_ratio: 0.1
67
+ normalize_embeds: false
68
+ zero_uncond_embeds: true
69
+ denoiser_model_type: simple-denoiser
70
+ denoiser_model:
71
+ input_channels: 64
72
+ output_channels: 64
73
+ n_ctx: 256
74
+ width: 768
75
+ layers: 6
76
+ heads: 12
77
+ context_dim: 1024
78
+ init_scale: 1.0
79
+ skip_ln: true
80
+ use_checkpoint: true
81
+ noise_scheduler_type: diffusers.schedulers.DDPMScheduler
82
+ noise_scheduler:
83
+ num_train_timesteps: 1000
84
+ beta_start: 0.00085
85
+ beta_end: 0.012
86
+ beta_schedule: scaled_linear
87
+ variance_type: fixed_small
88
+ clip_sample: false
89
+ denoise_scheduler_type: diffusers.schedulers.DDIMScheduler
90
+ denoise_scheduler:
91
+ num_train_timesteps: 1000
92
+ beta_start: 0.00085
93
+ beta_end: 0.012
94
+ beta_schedule: scaled_linear
95
+ clip_sample: false
96
+ set_alpha_to_one: false
97
+ steps_offset: 1
98
+ loggers:
99
+ wandb:
100
+ enable: false
101
+ project: JiangXin
102
+ name: text-to-shape-diffusion+michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k+michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
103
+ loss:
104
+ loss_type: mse
105
+ lambda_diffusion: 1.0
106
+ optimizer:
107
+ name: AdamW
108
+ args:
109
+ lr: 5.0e-05
110
+ betas:
111
+ - 0.9
112
+ - 0.99
113
+ eps: 1.0e-06
114
+ scheduler:
115
+ name: SequentialLR
116
+ interval: step
117
+ schedulers:
118
+ - name: LinearLR
119
+ interval: step
120
+ args:
121
+ start_factor: 1.0e-06
122
+ end_factor: 1.0
123
+ total_iters: 5000
124
+ - name: CosineAnnealingLR
125
+ interval: step
126
+ args:
127
+ T_max: 5000
128
+ eta_min: 0.0
129
+ milestones:
130
+ - 5000
131
+ trainer:
132
+ num_nodes: 2
133
+ max_epochs: 100000
134
+ log_every_n_steps: 5
135
+ num_sanity_val_steps: 1
136
+ check_val_every_n_epoch: 3
137
+ enable_progress_bar: true
138
+ precision: 16-mixed
139
+ strategy: ddp_find_unused_parameters_true
140
+ checkpoint:
141
+ save_last: true
142
+ save_top_k: -1
143
+ every_n_train_steps: 5000
image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41248dba953cad356c491e7584b4171920f2ad95af10b0f78225eda867dbb7c4
3
+ size 3722911570