Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,456 Bytes
0f079b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
exp_root_dir: "outputs"
name: "image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6"
tag: "${rmspace:${system.shape_model_type}+n${data.n_samples}+noise${data.noise_sigma}+pfeat${system.shape_model.point_feats}+normemb${system.condition_model.normalize_embeds}+lr${system.optimizer.args.lr}+qkvbias${system.shape_model.qkv_bias}+nfreq${system.shape_model.num_freqs}+ln_post${system.shape_model.use_ln_post},_}"
seed: 0
data_type: "objaverse-datamodule"
data:
root_dir: "data/objaverse_clean/cap3d_high_quality_170k_images"
data_type: "occupancy"
n_samples: 4096
noise_sigma: 0.
load_supervision: False
supervision_type: "occupancy"
n_supervision: 4096
load_image: True # whether to load images
image_data_path: data/objaverse_clean/raw_data/images/cap3d_high_quality_170k
image_type: "mvrgb" # rgb, normal, mvrgb, mvnormal
idx: [0, 4, 8, 12, 16]
n_views: 4
load_caption: False # whether to load captions
rotate_points: False
batch_size: 32
num_workers: 16
system_type: "shape-diffusion-system"
system:
val_samples_json: "val_data/mv_images/val_samples_rgb_mvimage.json"
z_scale_factor: 1.0
guidance_scale: 7.5
num_inference_steps: 50
eta: 0.0
shape_model_type: "michelangelo-autoencoder"
shape_model:
# pretrained_model_name_or_path: ./ckpts/3DNativeGeneration/michelangelo-aligned-autoencoder-l256-e64-ne8-nd16.ckpt
pretrained_model_name_or_path: "./outputs/image-to-shape-diffusion_bak/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/michelangelo-autoencoder+n4096+noise0.0+pfeat3+normembFalse+lr5e-05+qkvbiasFalse+nfreq8+ln_postTrue/ckpts/last.ckpt"
num_latents: 256
embed_dim: 64
point_feats: 3 # xyz + normal
out_dim: 1 # only occupancy
num_freqs: 8
include_pi: false
heads: 12
width: 768
num_encoder_layers: 8
num_decoder_layers: 16
use_ln_post: true
init_scale: 0.25
qkv_bias: false
use_flash: true
use_checkpoint: true
condition_model_type: "clip-embedder"
condition_model:
pretrained_model_name_or_path: "./ckpts/pretrained_weights/huggingface/hub/models--openai--clip-vit-large-patch14/snapshots/8d052a0f05efbaefbc9e8786ba291cfdf93e5bff"
encode_camera: true
camera_embeds_dim: 32 # 16 * 2[sin, cos]
n_views: ${data.n_views}
empty_embeds_ratio: 0.1
normalize_embeds: false
# zero_uncond_embeds: true
zero_uncond_embeds: false
denoiser_model_type: "simple-denoiser"
denoiser_model:
# pretrained_model_name_or_path: "./ckpts/CraftsMan/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6.pth"
pretrained_model_name_or_path: "./ckpts/CraftsMan/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-It500000.pth"
input_channels: ${system.shape_model.embed_dim}
output_channels: ${system.shape_model.embed_dim}
n_ctx: ${system.shape_model.num_latents}
width: 768
layers: 6 # 2 * 6 + 1 = 13
heads: 12
context_dim: 1024
init_scale: 1.0
skip_ln: true
use_checkpoint: true
noise_scheduler_type: "diffusers.schedulers.DDPMScheduler"
noise_scheduler:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "scaled_linear"
variance_type: "fixed_small"
clip_sample: false
denoise_scheduler_type: "diffusers.schedulers.DDIMScheduler"
denoise_scheduler:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "scaled_linear"
clip_sample: false # clip sample to -1~1
set_alpha_to_one: false
steps_offset: 1
loggers:
wandb:
enable: false
project: "CraftsMan"
name: image-to-shape-diffusion+${name}+${tag}
loss:
loss_type: "mse"
lambda_diffusion: 1.
optimizer:
name: AdamW
args:
lr: 5.e-5
betas: [0.9, 0.99]
eps: 1.e-6
scheduler:
name: SequentialLR
interval: step
schedulers:
- name: LinearLR
interval: step
args:
start_factor: 1e-6
end_factor: 1.0
total_iters: 5000
- name: CosineAnnealingLR
interval: step
args:
T_max: 5000
eta_min: 0.
milestones: [5000]
trainer:
num_nodes: 1
max_epochs: 100000
log_every_n_steps: 5
num_sanity_val_steps: 1
check_val_every_n_epoch: 3
enable_progress_bar: true
precision: 16-mixed
strategy: 'ddp_find_unused_parameters_true'
checkpoint:
save_last: true
save_top_k: -1
every_n_train_steps: 5000 |