callbacks: rollout_lh: _target_: mode.rollout.libero_rollout.RolloutLibero _recursive_: false env_cfg: _target_: mode.wrappers.hulc_wrapper.HulcWrapper skip_epochs: ${rollout_lh_skip_epochs} benchmark_name: ${libero_benchmark} rollout_freq: 10 num_videos: 0 num_sequences: 50 max_steps: 600 empty_cache: false debug: false n_eval: 20 num_procs: 10 use_mp: false task_embedding_format: clip device: ${device} checkpoint: _target_: pytorch_lightning.callbacks.ModelCheckpoint save_top_k: 1 verbose: true monitor: eval_lh/avg_seq_len mode: max dirpath: saved_models filename: '{epoch:02d}_{eval_lh/avg_seq_len:.2f}' every_n_epochs: ${callbacks.rollout_lh.rollout_freq} ema: _target_: mode.callbacks.ema.EMA decay: 0.999 start_step: 0 save_ema_weights_in_callback_state: true evaluate_ema_weights_instead: true power: 0.6666666666666666 inv_gamma: 1.0 min_value: 0.0 max_value: 0.9999 datamodule: datasets: lang_dataset: _target_: mode.datasets.libero_dataset.LiberoMultitaskDataset key: lang benchmark_name: ${libero_benchmark} batch_size: ${batch_size} proprio_state: ${datamodule.proprioception_dims} obs_space: ${datamodule.observation_space} num_workers: ${num_workers} action_seq_len: ${act_seq_len} obs_seq_len: ${obs_seq_len} split_ratio: 0.0 transforms: train: rgb_static: - _target_: torchvision.transforms.Resize size: 224 antialias: true - _target_: mode.utils.transforms.RandomShiftsAug pad: 10 - _target_: mode.utils.transforms.ScaleImageTensor - _target_: torchvision.transforms.Normalize mean: - 0.48145466 - 0.4578275 - 0.40821073 std: - 0.26862954 - 0.26130258 - 0.27577711 rgb_gripper: - _target_: torchvision.transforms.Resize size: 112 antialias: true - _target_: mode.utils.transforms.RandomShiftsAug pad: 4 - _target_: mode.utils.transforms.ScaleImageTensor - _target_: torchvision.transforms.Normalize mean: - 0.48145466 - 0.4578275 - 0.40821073 std: - 0.26862954 - 0.26130258 - 0.27577711 val: rgb_static: - _target_: torchvision.transforms.Resize size: 224 antialias: true - _target_: mode.utils.transforms.ScaleImageTensor - _target_: torchvision.transforms.Normalize mean: - 0.48145466 - 0.4578275 - 0.40821073 std: - 0.26862954 - 0.26130258 - 0.27577711 rgb_gripper: - _target_: torchvision.transforms.Resize size: 112 antialias: true - _target_: mode.utils.transforms.ScaleImageTensor - _target_: torchvision.transforms.Normalize mean: - 0.48145466 - 0.4578275 - 0.40821073 std: - 0.26862954 - 0.26130258 - 0.27577711 _target_: mode.datasets.libero_data_module.LiberoDataModule _recursive_: false root_data_dir: ${root_data_dir} action_space: 7 shuffle_val: false benchmark_name: ${libero_benchmark} observation_space: rgb_obs: - agentview_rgb - eye_in_hand_rgb depth_obs: [] state_obs: - gripper_states - joint_states actions: - rel_actions language: - language proprioception_dims: None model: language_goal: _target_: mode.models.networks.clip_lang_encoder.LangClip _recursive_: false model_name: ${clip_lang_model_name} model: _target_: mode.models.edm_diffusion.score_wrappers.GCDenoiser _recursive_: false sigma_data: ${model.sigma_data} inner_model: _target_: mode.models.networks.modedit.MoDeDiT action_dim: ${datamodule.action_space} goal_dim: ${model.cond_dim} obs_dim: ${obs_dim} goal_conditioned: true causal: true use_custom_attn_mask: false use_proprio: ${model.use_proprio} state_dim: ${proprio_dims} embed_dim: ${model.latent_dim} n_layers: 12 goal_seq_len: 1 obs_seq_len: ${obs_seq_len} action_seq_len: ${act_seq_len} embed_pdrob: 0 goal_drop: 0.1 attn_pdrop: 0.3 mlp_pdrop: 0.1 n_heads: 8 device: ${device} linear_output: true cond_router: true num_experts: 4 top_k: 2 router_normalize: true use_goal_in_routing: false use_argmax: false use_shared_expert: false use_noise_token_as_input: true init_style: olmoe _target_: mode.models.mode_agent.MoDEAgent _recursive_: false multistep: ${multistep} use_lr_scheduler: true entropy_gamma: 0.01 router_z_delta: 0.0 use_proprio: false seed: ${seed} sampler_type: ddim num_sampling_steps: 5 sigma_data: 0.5 sigma_min: 0.001 sigma_max: 80 noise_scheduler: exponential sigma_sample_density_type: loglogistic ckpt_path: /home/reuss/code/MeDiT_Policy/convert_weights/mode_first_run start_from_pretrained: true act_window_size: ${act_seq_len} latent_dim: 1024 obs_enc_dim: ${obs_dim} cond_dim: 512 resnet_type: '50' optimizer: _target_: torch.optim.AdamW transformer_weight_decay: 0.05 obs_encoder_weight_decay: 0.05 learning_rate: 0.0001 betas: - 0.9 - 0.95 lr_scheduler: lr_scheduler: init_lr: 0.0001 init_lr_scale: 0.1 final_lr_scale: 1.0e-06 total_steps: 45000 phase_ratio: (0.02, 0.08, 0.9) lr: 0.0001 root_data_dir: /home/yagmurlu/code/MoDE_Calvin/dataset/task_ABC_D lang_folder: lang_clip_resnet50 vis_clip_model_name: ViT-B/16 clip_lang_model_name: ViT-B/32 log_dir: ./logs slurm: false future_range: 29 seed: 242 device: cuda batch_size: 128 devices: 2 goal_window_size: 1 act_dim: 7 proprio_dims: 9 obs_dim: 512 goal_dim: 512 obs_seq_len: 1 act_seq_len: 10 multistep: ${act_seq_len} p_last_state: 0 gen_img_res: 112 max_epochs: 10 rollout_lh_skip_epochs: 9 num_workers: 1 benchmark_name: ${libero_benchmark} libero_benchmark: libero_90 trainer: gpus: ${devices} precision: 32 max_epochs: ${max_epochs} sync_batchnorm: false accelerator: auto limit_train_batches: 1000 limit_val_batches: 4 logger: _target_: pytorch_lightning.loggers.WandbLogger save_dir: . name: logger group: mode log_model: false project: ${libero_benchmark} entity: bennoq id: ???