Text-to-3D
File size: 4,045 Bytes
a73431a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
name: michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
description: ''
tag: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
seed: 0
use_timestamp: true
timestamp: ''
exp_root_dir: outputs
exp_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k
trial_name: michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
trial_dir: outputs/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k/michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
n_gpus: 8
resume: ./ckpts/3DNativeGeneration/michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k.ckpt
data_type: objaverse-datamodule
data:
  root_dir: data/objaverse_clean/cap3d_high_quality_170k_images
  data_type: occupancy
  n_samples: 4096
  noise_sigma: 0.0
  load_supervision: false
  supervision_type: occupancy
  n_supervision: 10000
  load_image: true
  image_data_path: data/objaverse_clean/raw_data/images/cap3d_high_quality_170k
  image_type: mvrgb
  idx:
  - 0
  - 4
  - 8
  - 12
  - 16
  n_views: 4
  load_caption: false
  rotate_points: false
  batch_size: 32
  num_workers: 16
system_type: shape-diffusion-system
system:
  val_samples_json: val_data/mv_images/val_samples_rgb_mvimage.json
  z_scale_factor: 1.0
  guidance_scale: 7.5
  num_inference_steps: 50
  eta: 0.0
  shape_model_type: michelangelo-aligned-autoencoder
  shape_model:
    num_latents: 256
    embed_dim: 64
    point_feats: 3
    out_dim: 1
    num_freqs: 8
    include_pi: false
    heads: 12
    width: 768
    num_encoder_layers: 8
    num_decoder_layers: 16
    use_ln_post: true
    init_scale: 0.25
    qkv_bias: false
    use_flash: true
    use_checkpoint: true
  condition_model_type: clip-embedder
  condition_model:
    pretrained_model_name_or_path: openai/clip-vit-large-patch14
    encode_camera: true
    camera_embeds_dim: 32
    n_views: 4
    empty_embeds_ratio: 0.1
    normalize_embeds: false
    zero_uncond_embeds: true
  denoiser_model_type: simple-denoiser
  denoiser_model:
    input_channels: 64
    output_channels: 64
    n_ctx: 256
    width: 768
    layers: 6
    heads: 12
    context_dim: 1024
    init_scale: 1.0
    skip_ln: true
    use_checkpoint: true
  noise_scheduler_type: diffusers.schedulers.DDPMScheduler
  noise_scheduler:
    num_train_timesteps: 1000
    beta_start: 0.00085
    beta_end: 0.012
    beta_schedule: scaled_linear
    variance_type: fixed_small
    clip_sample: false
  denoise_scheduler_type: diffusers.schedulers.DDIMScheduler
  denoise_scheduler:
    num_train_timesteps: 1000
    beta_start: 0.00085
    beta_end: 0.012
    beta_schedule: scaled_linear
    clip_sample: false
    set_alpha_to_one: false
    steps_offset: 1
  loggers:
    wandb:
      enable: false
      project: JiangXin
      name: text-to-shape-diffusion+michelangelo-image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-170k+michelangelo-aligned-autoencoder+n4096+noise0.0+pfeat3+zeroemb0.0+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue
  loss:
    loss_type: mse
    lambda_diffusion: 1.0
  optimizer:
    name: AdamW
    args:
      lr: 5.0e-05
      betas:
      - 0.9
      - 0.99
      eps: 1.0e-06
  scheduler:
    name: SequentialLR
    interval: step
    schedulers:
    - name: LinearLR
      interval: step
      args:
        start_factor: 1.0e-06
        end_factor: 1.0
        total_iters: 5000
    - name: CosineAnnealingLR
      interval: step
      args:
        T_max: 5000
        eta_min: 0.0
    milestones:
    - 5000
trainer:
  num_nodes: 2
  max_epochs: 100000
  log_every_n_steps: 5
  num_sanity_val_steps: 1
  check_val_every_n_epoch: 3
  enable_progress_bar: true
  precision: 16-mixed
  strategy: ddp_find_unused_parameters_true
checkpoint:
  save_last: true
  save_top_k: -1
  every_n_train_steps: 5000