random_seed: 1 WANDB: project: StructDiffusion save_dir: ${base_dirs.wandb_dir} name: conditional_pose_diffusion DATASET: data_root: ${base_dirs.data} vocab_dir: ${base_dirs.data}/type_vocabs_coarse.json # important use_virtual_structure_frame: True ignore_distractor_objects: True ignore_rgb: True # the following are determined by the dataset max_num_target_objects: 7 max_num_distractor_objects: 5 max_num_shape_parameters: 5 # set to zeros because they are not used for now max_num_rearrange_features: 0 max_num_anchor_features: 0 num_pts: 1024 filter_num_moved_objects_range: data_augmentation: False DATALOADER: batch_size: 64 num_workers: 8 pin_memory: True MODEL: # transformer encoder encoder_input_dim: 256 num_attention_heads: 8 encoder_hidden_dim: 512 encoder_dropout: 0.0 encoder_activation: relu encoder_num_layers: 8 # output head structure_dropout: 0 object_dropout: 0 # pc encoder ignore_rgb: ${DATASET.ignore_rgb} pc_emb_dim: 256 posed_pc_emb_dim: 80 # pose encoder pose_emb_dim: 80 # language word_emb_dim: 160 # diffusion step time_emb_dim: 80 # sequence embeddings # max_num_target_objects (+ max_num_distractor_objects if not ignore_distractor_objects) max_seq_size: 7 max_token_type_size: 4 seq_pos_emb_dim: 8 seq_type_emb_dim: 8 # virtual frame use_virtual_structure_frame: ${DATASET.use_virtual_structure_frame} NOISE_SCHEDULE: timesteps: 200 LOSS: type: huber OPTIMIZER: lr: 0.0001 weight_decay: 0 #0.0001 # lr_restart: 3000 # warmup: 10 TRAINER: max_epochs: 200 gradient_clip_val: 1.0 gpus: 1 deterministic: False # enable_progress_bar: False