Spaces:
Runtime error
Runtime error
File size: 5,575 Bytes
b621857 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03"
#wandb:
# project: "image_diffuser"
# offline: false
training:
steps: 500000
use_amp: true
ckpt_path: ""
base_lr: 1.e-4
gradient_clip_val: 5.0
gradient_clip_algorithm: "norm"
every_n_train_steps: 5000
val_check_interval: 1024
limit_val_batches: 16
# dataset
dataset:
target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule
params:
batch_size: 38
num_workers: 4
val_num_workers: 4
buffer_size: 256
return_normal: true
random_crop: false
surface_sampling: true
pc_size: &pc_size 4096
image_size: 384
mean: &mean [0.5, 0.5, 0.5]
std: &std [0.5, 0.5, 0.5]
cond_stage_key: "text"
meta_info:
3D-FUTURE:
render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
ABO:
render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
tar_folder: "/root/workspace/datasets/make_tars/ABO"
GSO:
render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
tar_folder: "/root/workspace/datasets/make_tars/GSO"
TOYS4K:
render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
3DCaricShop:
render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
Thingi10K:
render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
shapenet:
render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
tar_folder: "/root/workspace/datasets/make_tars/shapenet"
pokemon:
render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
tar_folder: "/root/workspace/datasets/make_tars/pokemon"
objaverse:
render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
tar_folder: "/root/workspace/datasets/make_tars/objaverse"
model:
target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
params:
first_stage_config:
target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
params:
# ckpt_path: "/root/workspace/cq_workspace/michelangelo/experiments/aligned_shape_latents/clip_aslperceiver_sp+pk_01_01/ckpt/ckpt-step=00230000.ckpt"
shape_module_cfg:
target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
params:
num_latents: &num_latents 256
embed_dim: &embed_dim 64
point_feats: 3 # normal
num_freqs: 8
include_pi: false
heads: 12
width: 768
num_encoder_layers: 8
num_decoder_layers: 16
use_ln_post: true
init_scale: 0.25
qkv_bias: false
use_checkpoint: true
aligned_module_cfg:
target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
params:
clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
loss_cfg:
target: torch.nn.Identity
cond_stage_config:
target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder
params:
version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
zero_embedding_radio: 0.1
max_length: 77
first_stage_key: "surface"
cond_stage_key: "text"
scale_by_std: false
denoiser_cfg:
target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
params:
input_channels: *embed_dim
output_channels: *embed_dim
n_ctx: *num_latents
width: 768
layers: 8 # 2 * 6 + 1 = 13
heads: 12
context_dim: 768
init_scale: 1.0
skip_ln: true
use_checkpoint: true
scheduler_cfg:
guidance_scale: 7.5
num_inference_steps: 50
eta: 0.0
noise:
target: diffusers.schedulers.DDPMScheduler
params:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "scaled_linear"
variance_type: "fixed_small"
clip_sample: false
denoise:
target: diffusers.schedulers.DDIMScheduler
params:
num_train_timesteps: 1000
beta_start: 0.00085
beta_end: 0.012
beta_schedule: "scaled_linear"
clip_sample: false # clip sample to -1~1
set_alpha_to_one: false
steps_offset: 1
optimizer_cfg:
optimizer:
target: torch.optim.AdamW
params:
betas: [0.9, 0.99]
eps: 1.e-6
weight_decay: 1.e-2
scheduler:
target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
params:
warm_up_steps: 5000
f_start: 1.e-6
f_min: 1.e-3
f_max: 1.0
loss_cfg:
loss_type: "mse"
logger:
target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger
params:
step_frequency: 1000
num_samples: 4
sample_times: 4
bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
octree_depth: 7
num_chunks: 10000
|