#cwd: ${hydra:runtime.output_dir} name: adroit log_frequency: 100 load_path: null resume_name: null auto_resume: false num_node: 1 node_rank: null dist_usrl: null gpu: 0 sync_bn: false tensorboard: false timestamp: false seed: null cudnn_deterministic: false amp: false debug: false opts: null solver: base_lr: 3.0e-06 adjust_lr: none max_epochs: 100 save_epochs: 20 validation_epochs: 1 sample_iterations: 400 print_specific_things: true ema: decay: 0.99 update_interval: 25 device: cpu clip_grad_norm: target: diffusion_reward.models.video_models.vqdiffusion.engine.clip_grad_norm.ClipGradNorm params: start_iteration: 0 end_iteration: 5000 max_norm: 0.5 optimizers_and_schedulers: - name: none optimizer: target: torch.optim.AdamW params: betas: - 0.9 - 0.96 weight_decay: 0.045 scheduler: step_iteration: 1 target: diffusion_reward.models.video_models.vqdiffusion.engine.lr_scheduler.ReduceLROnPlateauWithWarmup params: factor: 0.5 patience: 100000 min_lr: 1.0e-06 threshold: 0.1 threshold_mode: rel warmup_lr: 0.00045 warmup: 5000 save_dir: ??? dist_url: ??? ngpus_per_node: ??? world_size: ??? local_rank: ??? global_rank: ??? distributed: false model: target: diffusion_reward.models.video_models.vqdiffusion.modeling.models.frame_conditional_dalle.FC_DALLE params: content_info: key: image condition_info: key: frame frame_skip: 1 content_codec_config: target: diffusion_reward.models.video_models.vqdiffusion.modeling.codecs.image_codec.vqgan.MiniVQGAN params: args: latent_dim: 64 device: cuda image_channels: 3 num_codebook_vectors: 1024 beta: 0.25 channels: - 128 - 128 - 256 - 256 resolution: 64 latent_size: 8 trainable: false token_shape: - 8 - 8 ckpt_path: /exp_local/codec_models/vqgan/adroit/results/checkpoints/vqgan.pt diffusion_config: target: diffusion_reward.models.video_models.vqdiffusion.modeling.transformers.diffusion_transformer.DiffusionTransformer params: diffusion_step: 100 alpha_init_type: alpha1 auxiliary_loss_weight: 0.001 adaptive_auxiliary_loss: true mask_weight: - 1 - 1 transformer_config: target: diffusion_reward.models.video_models.vqdiffusion.modeling.transformers.transformer_utils.Text2ImageTransformer params: diffusion_step: ??? content_emb_config: ??? attn_type: selfcross n_layer: 16 condition_seq_len: 128 content_seq_len: 64 content_spatial_size: - 8 - 8 n_embd: 128 condition_dim: 1024 n_head: 16 attn_pdrop: 0.0 resid_pdrop: 0.0 block_activate: GELU2 timestep_type: adalayernorm mlp_hidden_times: 2 mlp_type: conv_mlp condition_emb_config: target: diffusion_reward.models.video_models.vqdiffusion.modeling.embeddings.frame_embedding.FrameEmbedding params: num_embed: 1024 embed_dim: 1024 identity: false trainable: true num_cond_frames: 2 content_emb_config: target: diffusion_reward.models.video_models.vqdiffusion.modeling.embeddings.dalle_mask_image_embedding.DalleMaskImageEmbedding params: num_embed: 1024 spatial_size: - 8 - 8 embed_dim: 128 trainable: true pos_emb_type: embedding dataloader: data_root: /video_dataset/adroit/ batch_size: 4 num_workers: 4 train_datasets: - target: diffusion_reward.models.video_models.vqdiffusion.data.dataset.VideoDataset params: data_root: ${dataloader.data_root} phase: train frame_skip: ${model.params.frame_skip} frames_per_sample: 3 size: 64 validation_datasets: - target: diffusion_reward.models.video_models.vqdiffusion.data.dataset.VideoDataset params: data_root: ${dataloader.data_root} phase: test frame_skip: ${model.params.frame_skip} frames_per_sample: 3 size: 64