Spaces:
Runtime error
Runtime error
name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03" | |
#wandb: | |
# project: "image_diffuser" | |
# offline: false | |
training: | |
steps: 500000 | |
use_amp: true | |
ckpt_path: "" | |
base_lr: 1.e-4 | |
gradient_clip_val: 5.0 | |
gradient_clip_algorithm: "norm" | |
every_n_train_steps: 5000 | |
val_check_interval: 1024 | |
limit_val_batches: 16 | |
# dataset | |
dataset: | |
target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule | |
params: | |
batch_size: 38 | |
num_workers: 4 | |
val_num_workers: 4 | |
buffer_size: 256 | |
return_normal: true | |
random_crop: false | |
surface_sampling: true | |
pc_size: &pc_size 4096 | |
image_size: 384 | |
mean: &mean [0.5, 0.5, 0.5] | |
std: &std [0.5, 0.5, 0.5] | |
cond_stage_key: "text" | |
meta_info: | |
3D-FUTURE: | |
render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE" | |
ABO: | |
render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/ABO" | |
GSO: | |
render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/GSO" | |
TOYS4K: | |
render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/TOYS4K" | |
3DCaricShop: | |
render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop" | |
Thingi10K: | |
render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/Thingi10K" | |
shapenet: | |
render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/shapenet" | |
pokemon: | |
render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/pokemon" | |
objaverse: | |
render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders" | |
tar_folder: "/root/workspace/datasets/make_tars/objaverse" | |
model: | |
target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser | |
params: | |
first_stage_config: | |
target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule | |
params: | |
# ckpt_path: "/root/workspace/cq_workspace/michelangelo/experiments/aligned_shape_latents/clip_aslperceiver_sp+pk_01_01/ckpt/ckpt-step=00230000.ckpt" | |
shape_module_cfg: | |
target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver | |
params: | |
num_latents: &num_latents 256 | |
embed_dim: &embed_dim 64 | |
point_feats: 3 # normal | |
num_freqs: 8 | |
include_pi: false | |
heads: 12 | |
width: 768 | |
num_encoder_layers: 8 | |
num_decoder_layers: 16 | |
use_ln_post: true | |
init_scale: 0.25 | |
qkv_bias: false | |
use_checkpoint: true | |
aligned_module_cfg: | |
target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule | |
params: | |
clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14" | |
loss_cfg: | |
target: torch.nn.Identity | |
cond_stage_config: | |
target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder | |
params: | |
version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14" | |
zero_embedding_radio: 0.1 | |
max_length: 77 | |
first_stage_key: "surface" | |
cond_stage_key: "text" | |
scale_by_std: false | |
denoiser_cfg: | |
target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser | |
params: | |
input_channels: | |
output_channels: | |
n_ctx: | |
width: 768 | |
layers: 8 # 2 * 6 + 1 = 13 | |
heads: 12 | |
context_dim: 768 | |
init_scale: 1.0 | |
skip_ln: true | |
use_checkpoint: true | |
scheduler_cfg: | |
guidance_scale: 7.5 | |
num_inference_steps: 50 | |
eta: 0.0 | |
noise: | |
target: diffusers.schedulers.DDPMScheduler | |
params: | |
num_train_timesteps: 1000 | |
beta_start: 0.00085 | |
beta_end: 0.012 | |
beta_schedule: "scaled_linear" | |
variance_type: "fixed_small" | |
clip_sample: false | |
denoise: | |
target: diffusers.schedulers.DDIMScheduler | |
params: | |
num_train_timesteps: 1000 | |
beta_start: 0.00085 | |
beta_end: 0.012 | |
beta_schedule: "scaled_linear" | |
clip_sample: false # clip sample to -1~1 | |
set_alpha_to_one: false | |
steps_offset: 1 | |
optimizer_cfg: | |
optimizer: | |
target: torch.optim.AdamW | |
params: | |
betas: [0.9, 0.99] | |
eps: 1.e-6 | |
weight_decay: 1.e-2 | |
scheduler: | |
target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler | |
params: | |
warm_up_steps: 5000 | |
f_start: 1.e-6 | |
f_min: 1.e-3 | |
f_max: 1.0 | |
loss_cfg: | |
loss_type: "mse" | |
logger: | |
target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger | |
params: | |
step_frequency: 1000 | |
num_samples: 4 | |
sample_times: 4 | |
bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1] | |
octree_depth: 7 | |
num_chunks: 10000 | |