gubam commited on 13 days ago

Commit

a158b2c

verified ·

1 Parent(s): 456ce15

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

config.json +70 -0
embodiment_id.json +11 -0
experiment_cfg/conf.yaml +226 -0
experiment_cfg/config.yaml +260 -0
experiment_cfg/dataset_statistics.json +0 -0
experiment_cfg/final_model_config.json +54 -0
experiment_cfg/final_processor_config.json +0 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
optimizer.pt +3 -0
processor_config.json +472 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
statistics.json +0 -0
trainer_state.json +0 -0
training_args.bin +3 -0
wandb_config.json +1 -0

config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "action_horizon": 50,
+  "add_pos_embed": true,
+  "apply_sincos_state_encoding": true,
+  "architectures": [
+    "Gr00tN1d6"
+  ],
+  "attn_dropout": 0.2,
+  "attn_implementation": null,
+  "backbone_embedding_dim": 2048,
+  "backbone_model_type": "eagle",
+  "backbone_trainable_params_fp32": true,
+  "collator_overwrite_image_inputs": false,
+  "color_jitter_params": {
+    "brightness": 0.1,
+    "contrast": 0.1,
+    "hue": 0.1,
+    "saturation": 0.1
+  },
+  "crop_fraction": 0.95,
+  "diffusion_model_cfg": {
+    "attention_head_dim": 48,
+    "dropout": 0.2,
+    "final_dropout": true,
+    "interleave_self_attention": true,
+    "norm_type": "ada_norm",
+    "num_attention_heads": 32,
+    "num_layers": 32,
+    "output_dim": 1024,
+    "positional_embeddings": null
+  },
+  "eagle_collator": true,
+  "formalize_language": true,
+  "gemma_collator": false,
+  "hidden_size": 1024,
+  "image_crop_size": null,
+  "image_target_size": null,
+  "input_embedding_dim": 1536,
+  "load_bf16": true,
+  "max_action_dim": 128,
+  "max_num_embodiments": 32,
+  "max_seq_len": 1024,
+  "max_state_dim": 128,
+  "model_dtype": "bfloat16",
+  "model_name": "nvidia/Eagle-Block2A-2B-v2",
+  "model_type": "Gr00tN1d6",
+  "noise_beta_alpha": 1.5,
+  "noise_beta_beta": 1.0,
+  "noise_s": 0.999,
+  "num_inference_timesteps": 4,
+  "num_timestep_buckets": 1000,
+  "random_rotation_angle": null,
+  "reproject_vision": false,
+  "select_layer": 16,
+  "shortest_image_edge": 256,
+  "state_dropout_prob": 0.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "tune_diffusion_model": true,
+  "tune_llm": false,
+  "tune_projector": true,
+  "tune_top_llm_layers": 4,
+  "tune_visual": false,
+  "tune_vlln": true,
+  "use_albumentations_transforms": true,
+  "use_alternate_vl_dit": true,
+  "use_flash_attention": true,
+  "use_relative_action": true,
+  "use_vlln": true
+}

embodiment_id.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "robocasa_panda_omron": 13,
+  "gr1": 20,
+  "behavior_r1_pro": 24,
+  "unitree_g1": 8,
+  "oxe_google": 0,
+  "oxe_widowx": 1,
+  "libero_panda": 2,
+  "oxe_droid": 16,
+  "new_embodiment": 10
+}

experiment_cfg/conf.yaml ADDED Viewed

	@@ -0,0 +1,226 @@

+load_config_path: null
+model:
+  model_type: Gr00tN1d6
+  model_dtype: bfloat16
+  model_name: nvidia/Eagle-Block2A-2B-v2
+  backbone_model_type: eagle
+  model_revision: null
+  tune_top_llm_layers: 4
+  backbone_embedding_dim: 2048
+  tune_llm: false
+  tune_visual: false
+  select_layer: 16
+  reproject_vision: false
+  use_flash_attention: true
+  load_bf16: false
+  collator_overwrite_image_inputs: false
+  eagle_collator: true
+  backbone_trainable_params_fp32: true
+  image_crop_size: null
+  image_target_size: null
+  shortest_image_edge: 256
+  crop_fraction: 0.95
+  random_rotation_angle: null
+  color_jitter_params:
+    brightness: 0.3
+    contrast: 0.4
+    saturation: 0.5
+    hue: 0.08
+  use_albumentations_transforms: true
+  extra_augmentation_config: null
+  formalize_language: true
+  apply_sincos_state_encoding: false
+  use_relative_action: true
+  max_state_dim: 29
+  max_action_dim: 29
+  action_horizon: 16
+  hidden_size: 1024
+  input_embedding_dim: 1536
+  add_pos_embed: true
+  attn_dropout: 0.2
+  use_vlln: true
+  max_seq_len: 1024
+  use_alternate_vl_dit: true
+  attend_text_every_n_blocks: 2
+  diffusion_model_cfg:
+    positional_embeddings: null
+    num_layers: 32
+    num_attention_heads: 32
+    attention_head_dim: 48
+    norm_type: ada_norm
+    dropout: 0.2
+    final_dropout: true
+    output_dim: 1024
+    interleave_self_attention: true
+  num_inference_timesteps: 4
+  noise_beta_alpha: 1.5
+  noise_beta_beta: 1.0
+  noise_s: 0.999
+  num_timestep_buckets: 1000
+  tune_projector: true
+  tune_diffusion_model: true
+  tune_vlln: true
+  state_dropout_prob: 0.0
+  state_additive_noise_scale: 0.0
+  max_num_embodiments: 32
+data:
+  datasets:
+  - dataset_paths:
+    - /content/Isaac-GR00T/gubam/upper_body_dataset
+    embodiment_tag: new_embodiment
+    mix_ratio: 1.0
+    dataset_type: physical_embodiment
+    val_dataset_path: null
+  modality_configs:
+    new_embodiment:
+      video:
+        delta_indices:
+        - 0
+        modality_keys:
+        - front
+        - left_wrist
+        - right_wrist
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs: null
+      state:
+        delta_indices:
+        - 0
+        modality_keys:
+        - arm
+        - gripper
+        sin_cos_embedding_keys:
+        - arm
+        mean_std_embedding_keys: null
+        action_configs: null
+      action:
+        delta_indices:
+        - 0
+        - 1
+        - 2
+        - 3
+        - 4
+        - 5
+        - 6
+        - 7
+        - 8
+        - 9
+        - 10
+        - 11
+        - 12
+        - 13
+        - 14
+        - 15
+        - 16
+        - 17
+        - 18
+        - 19
+        - 20
+        - 21
+        - 22
+        - 23
+        - 24
+        - 25
+        - 26
+        - 27
+        - 28
+        - 29
+        modality_keys:
+        - arm
+        - gripper
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs:
+        - rep: RELATIVE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+        - rep: ABSOLUTE
+          type: NON_EEF
+          format: DEFAULT
+          state_key: null
+      language:
+        delta_indices:
+        - 0
+        modality_keys:
+        - annotation.task_description
+        sin_cos_embedding_keys: null
+        mean_std_embedding_keys: null
+        action_configs: null
+  download_cache: false
+  shard_size: 1024
+  episode_sampling_rate: 0.1
+  num_shards_per_epoch: 100000
+  override_pretraining_statistics: false
+  mode: single_turn
+  random_chop: 0.0
+  mock_dataset_mode: false
+  shuffle: true
+  seed: 42
+  multiprocessing_context: fork
+  allow_padding: false
+  subsample_ratio: 1.0
+  image_crop_size:
+  - 244
+  - 244
+  image_target_size:
+  - 224
+  - 224
+  video_backend: torchcodec
+training:
+  output_dir: /tmp/dual_arm_finetune
+  experiment_name: null
+  max_steps: 30000
+  global_batch_size: 8
+  batch_size: null
+  gradient_accumulation_steps: 1
+  learning_rate: 0.0001
+  lr_scheduler_type: cosine
+  weight_decay: 1.0e-05
+  warmup_ratio: 0.05
+  warmup_steps: 0
+  max_grad_norm: 1.0
+  optim: adamw_torch
+  start_from_checkpoint: nvidia/GR00T-N1.6-3B
+  tf32: true
+  fp16: false
+  bf16: true
+  eval_bf16: true
+  logging_steps: 10
+  save_steps: 10000
+  save_total_limit: 5
+  save_vl_model: false
+  upload_checkpoints: false
+  upload_every: 1000
+  upload_last_n_checkpoints: 5
+  max_concurrent_uploads: 2
+  eval_strategy: 'no'
+  eval_steps: 500
+  eval_set_split_ratio: 0.1
+  eval_batch_size: 2
+  save_best_eval_metric_name: ''
+  save_best_eval_metric_greater_is_better: true
+  deepspeed_stage: 2
+  gradient_checkpointing: false
+  transformers_trust_remote_code: true
+  transformers_local_files_only: false
+  transformers_cache_dir: null
+  transformers_access_token: null
+  use_ddp: false
+  ddp_bucket_cap_mb: 100
+  num_gpus: 1
+  dataloader_num_workers: 4
+  remove_unused_columns: false
+  use_wandb: true
+  wandb_project: finetune-gr00t-n1d6
+  enable_profiling: false
+  max_retries: 3
+  assert_loss_less_than: null
+  add_rl_callback: false
+  enable_open_loop_eval: false
+  open_loop_eval_traj_ids:
+  - 0
+  open_loop_eval_steps_per_traj: 100
+  open_loop_eval_plot_indices: null
+max_steps: 30000
+save_steps: 10000

experiment_cfg/config.yaml ADDED Viewed

	@@ -0,0 +1,260 @@

+!!python/object:gr00t.configs.base_config.Config
+data: !!python/object:gr00t.configs.data.data_config.DataConfig
+  allow_padding: false
+  datasets:
+  - !!python/object:gr00t.configs.data.data_config.SingleDatasetConfig
+    dataset_paths:
+    - /content/Isaac-GR00T/gubam/upper_body_dataset
+    dataset_type: physical_embodiment
+    embodiment_tag: new_embodiment
+    mix_ratio: 1.0
+    val_dataset_path: null
+  download_cache: false
+  episode_sampling_rate: 0.1
+  image_crop_size:
+  - 244
+  - 244
+  image_target_size:
+  - 224
+  - 224
+  mock_dataset_mode: false
+  modality_configs:
+    new_embodiment:
+      action: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs:
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: &id001 !!python/object/apply:gr00t.data.types.ActionFormat
+          - default
+          rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
+          - relative
+          state_key: null
+          type: &id002 !!python/object/apply:gr00t.data.types.ActionType
+          - non_eef
+        - !!python/object:gr00t.data.types.ActionConfig
+          format: *id001
+          rep: !!python/object/apply:gr00t.data.types.ActionRepresentation
+          - absolute
+          state_key: null
+          type: *id002
+        delta_indices:
+        - 0
+        - 1
+        - 2
+        - 3
+        - 4
+        - 5
+        - 6
+        - 7
+        - 8
+        - 9
+        - 10
+        - 11
+        - 12
+        - 13
+        - 14
+        - 15
+        - 16
+        - 17
+        - 18
+        - 19
+        - 20
+        - 21
+        - 22
+        - 23
+        - 24
+        - 25
+        - 26
+        - 27
+        - 28
+        - 29
+        mean_std_embedding_keys: null
+        modality_keys:
+        - arm
+        - gripper
+        sin_cos_embedding_keys: null
+      language: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - annotation.task_description
+        sin_cos_embedding_keys: null
+      state: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - arm
+        - gripper
+        sin_cos_embedding_keys:
+        - arm
+      video: !!python/object:gr00t.data.types.ModalityConfig
+        action_configs: null
+        delta_indices:
+        - 0
+        mean_std_embedding_keys: null
+        modality_keys:
+        - front
+        - left_wrist
+        - right_wrist
+        sin_cos_embedding_keys: null
+  mode: single_turn
+  multiprocessing_context: fork
+  num_shards_per_epoch: 100000
+  override_pretraining_statistics: false
+  random_chop: 0.0
+  seed: 42
+  shard_size: 1024
+  shuffle: true
+  subsample_ratio: 1.0
+  video_backend: torchcodec
+load_config_path: null
+model: !!python/object:gr00t.configs.model.gr00t_n1d6.Gr00tN1d6Config
+  _attn_implementation_autoset: false
+  _attn_implementation_internal: null
+  _commit_hash: null
+  _name_or_path: ''
+  add_cross_attention: false
+  architectures: null
+  backbone_model_type: eagle
+  backbone_trainable_params_fp32: true
+  bad_words_ids: null
+  begin_suppress_tokens: null
+  bos_token_id: null
+  chunk_size_feed_forward: 0
+  color_jitter_params:
+    brightness: 0.3
+    contrast: 0.4
+    hue: 0.08
+    saturation: 0.5
+  cross_attention_hidden_size: null
+  decoder_start_token_id: null
+  diffusion_model_cfg:
+    attention_head_dim: 48
+    dropout: 0.2
+    final_dropout: true
+    interleave_self_attention: true
+    norm_type: ada_norm
+    num_attention_heads: 32
+    num_layers: 32
+    output_dim: 1024
+    positional_embeddings: null
+  diversity_penalty: 0.0
+  do_sample: false
+  eagle_collator: true
+  early_stopping: false
+  encoder_no_repeat_ngram_size: 0
+  eos_token_id: null
+  exponential_decay_length_penalty: null
+  extra_augmentation_config: null
+  finetuning_task: null
+  forced_bos_token_id: null
+  forced_eos_token_id: null
+  id2label:
+    0: LABEL_0
+    1: LABEL_1
+  is_decoder: false
+  is_encoder_decoder: false
+  label2id:
+    LABEL_0: 0
+    LABEL_1: 1
+  length_penalty: 1.0
+  load_bf16: false
+  max_length: 20
+  min_length: 0
+  model_name: nvidia/Eagle-Block2A-2B-v2
+  no_repeat_ngram_size: 0
+  num_beam_groups: 1
+  num_beams: 1
+  num_return_sequences: 1
+  output_attentions: false
+  output_hidden_states: false
+  output_scores: false
+  pad_token_id: null
+  prefix: null
+  problem_type: null
+  pruned_heads: {}
+  random_rotation_angle: null
+  remove_invalid_values: false
+  repetition_penalty: 1.0
+  reproject_vision: false
+  return_dict: true
+  return_dict_in_generate: false
+  sep_token_id: null
+  state_dropout_prob: 0.0
+  suppress_tokens: null
+  task_specific_params: null
+  temperature: 1.0
+  tf_legacy_loss: false
+  tie_encoder_decoder: false
+  tie_word_embeddings: true
+  tokenizer_class: null
+  top_k: 50
+  top_p: 1.0
+  torch_dtype: null
+  torchscript: false
+  transformers_version: null
+  tune_diffusion_model: true
+  tune_llm: false
+  tune_projector: true
+  tune_visual: false
+  typical_p: 1.0
+  use_bfloat16: false
+  use_relative_action: true
+training: !!python/object:gr00t.configs.training.training_config.TrainingConfig
+  add_rl_callback: false
+  assert_loss_less_than: null
+  batch_size: null
+  bf16: true
+  dataloader_num_workers: 4
+  ddp_bucket_cap_mb: 100
+  deepspeed_stage: 2
+  enable_open_loop_eval: false
+  enable_profiling: false
+  eval_batch_size: 2
+  eval_bf16: true
+  eval_set_split_ratio: 0.1
+  eval_steps: 500
+  eval_strategy: 'no'
+  experiment_name: null
+  fp16: false
+  global_batch_size: 8
+  gradient_accumulation_steps: 1
+  gradient_checkpointing: false
+  learning_rate: 0.0001
+  logging_steps: 10
+  lr_scheduler_type: cosine
+  max_concurrent_uploads: 2
+  max_grad_norm: 1.0
+  max_retries: 3
+  max_steps: 30000
+  num_gpus: 1
+  open_loop_eval_plot_indices: null
+  open_loop_eval_steps_per_traj: 100
+  open_loop_eval_traj_ids:
+  - 0
+  optim: adamw_torch
+  output_dir: /tmp/dual_arm_finetune
+  remove_unused_columns: false
+  save_best_eval_metric_greater_is_better: true
+  save_best_eval_metric_name: ''
+  save_steps: 10000
+  save_total_limit: 5
+  save_vl_model: false
+  start_from_checkpoint: nvidia/GR00T-N1.6-3B
+  tf32: true
+  transformers_access_token: null
+  transformers_cache_dir: null
+  transformers_local_files_only: false
+  transformers_trust_remote_code: true
+  upload_checkpoints: false
+  upload_every: 1000
+  upload_last_n_checkpoints: 5
+  use_ddp: false
+  use_wandb: true
+  wandb_project: finetune-gr00t-n1d6
+  warmup_ratio: 0.05
+  warmup_steps: 0
+  weight_decay: 1.0e-05

experiment_cfg/dataset_statistics.json ADDED Viewed

The diff for this file is too large to render. See raw diff

experiment_cfg/final_model_config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "model_type": "Gr00tN1d6",
+  "model_dtype": "bfloat16",
+  "model_name": "nvidia/Eagle-Block2A-2B-v2",
+  "backbone_model_type": "eagle",
+  "model_revision": null,
+  "tune_top_llm_layers": 4,
+  "backbone_embedding_dim": 2048,
+  "tune_llm": false,
+  "tune_visual": false,
+  "select_layer": 16,
+  "reproject_vision": false,
+  "use_flash_attention": true,
+  "load_bf16": true,
+  "collator_overwrite_image_inputs": false,
+  "eagle_collator": true,
+  "backbone_trainable_params_fp32": true,
+  "extra_augmentation_config": null,
+  "apply_sincos_state_encoding": true,
+  "use_relative_action": true,
+  "max_state_dim": 128,
+  "max_action_dim": 128,
+  "action_horizon": 50,
+  "hidden_size": 1024,
+  "input_embedding_dim": 1536,
+  "add_pos_embed": true,
+  "attn_dropout": 0.2,
+  "use_vlln": true,
+  "max_seq_len": 1024,
+  "use_alternate_vl_dit": true,
+  "attend_text_every_n_blocks": 2,
+  "diffusion_model_cfg": {
+    "attention_head_dim": 48,
+    "dropout": 0.2,
+    "final_dropout": true,
+    "interleave_self_attention": true,
+    "norm_type": "ada_norm",
+    "num_attention_heads": 32,
+    "num_layers": 32,
+    "output_dim": 1024,
+    "positional_embeddings": null
+  },
+  "num_inference_timesteps": 4,
+  "noise_beta_alpha": 1.5,
+  "noise_beta_beta": 1.0,
+  "noise_s": 0.999,
+  "num_timestep_buckets": 1000,
+  "tune_projector": true,
+  "tune_diffusion_model": true,
+  "tune_vlln": true,
+  "state_dropout_prob": 0.0,
+  "state_additive_noise_scale": 0.0,
+  "max_num_embodiments": 32
+}

experiment_cfg/final_processor_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b71989517f657d378108ca865290a96c6518d6eb579a803e01b101d1b3a8de3
+size 4990120184

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f16e6383d1f5d4203084c8de633e7581b1b0882826b0d2b8e0e65ea0241ed2db
+size 4823190320

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:398d01215738f4995cdaca27ba6ebd6cc87565c13081e156c84230197c5dd14f
+size 12960193762

processor_config.json ADDED Viewed

	@@ -0,0 +1,472 @@

+{
+  "processor_class": "Gr00tN1d6Processor",
+  "processor_kwargs": {
+    "modality_configs": {
+      "behavior_r1_pro": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "observation.images.rgb.head_256_256",
+            "observation.images.rgb.left_wrist_256_256",
+            "observation.images.rgb.right_wrist_256_256"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "robot_pos",
+            "robot_ori_cos",
+            "robot_ori_sin",
+            "robot_2d_ori",
+            "robot_2d_ori_cos",
+            "robot_2d_ori_sin",
+            "robot_lin_vel",
+            "robot_ang_vel",
+            "arm_left_qpos",
+            "arm_left_qpos_sin",
+            "arm_left_qpos_cos",
+            "eef_left_pos",
+            "eef_left_quat",
+            "gripper_left_qpos",
+            "arm_right_qpos",
+            "arm_right_qpos_sin",
+            "arm_right_qpos_cos",
+            "eef_right_pos",
+            "eef_right_quat",
+            "gripper_right_qpos",
+            "trunk_qpos"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            28,
+            29,
+            30,
+            31
+          ],
+          "modality_keys": [
+            "base",
+            "torso",
+            "left_arm",
+            "left_gripper",
+            "right_arm",
+            "right_gripper"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "trunk_qpos"
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "arm_left_qpos"
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": "arm_right_qpos"
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.human.coarse_action"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "gr1": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "ego_view_bg_crop_pad_res256_freq20"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "sin_cos_embedding_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15
+          ],
+          "modality_keys": [
+            "left_arm",
+            "right_arm",
+            "left_hand",
+            "right_hand",
+            "waist"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "task"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "robocasa_panda_omron": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "res256_image_side_0",
+            "res256_image_side_1",
+            "res256_image_wrist_0"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "end_effector_position_relative",
+            "end_effector_rotation_relative",
+            "gripper_qpos",
+            "base_position",
+            "base_rotation"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15
+          ],
+          "modality_keys": [
+            "end_effector_position",
+            "end_effector_rotation",
+            "gripper_close",
+            "base_motion",
+            "control_mode"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.human.action.task_description"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      },
+      "new_embodiment": {
+        "video": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "front",
+            "left_wrist",
+            "right_wrist"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "state": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "arm",
+            "gripper"
+          ],
+          "sin_cos_embedding_keys": [
+            "arm"
+          ],
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        },
+        "action": {
+          "delta_indices": [
+            0,
+            1,
+            2,
+            3,
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            28,
+            29
+          ],
+          "modality_keys": [
+            "arm",
+            "gripper"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": [
+            {
+              "rep": "RELATIVE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            },
+            {
+              "rep": "ABSOLUTE",
+              "type": "NON_EEF",
+              "format": "DEFAULT",
+              "state_key": null
+            }
+          ]
+        },
+        "language": {
+          "delta_indices": [
+            0
+          ],
+          "modality_keys": [
+            "annotation.task_description"
+          ],
+          "sin_cos_embedding_keys": null,
+          "mean_std_embedding_keys": null,
+          "action_configs": null
+        }
+      }
+    },
+    "image_crop_size": null,
+    "image_target_size": null,
+    "use_albumentations": true,
+    "random_rotation_angle": null,
+    "color_jitter_params": {
+      "brightness": 0.3,
+      "contrast": 0.4,
+      "saturation": 0.5,
+      "hue": 0.08
+    },
+    "shortest_image_edge": 256,
+    "crop_fraction": 0.95,
+    "model_name": "nvidia/Eagle-Block2A-2B-v2",
+    "model_type": "eagle",
+    "formalize_language": true,
+    "max_state_dim": 128,
+    "max_action_dim": 128,
+    "max_action_horizon": 50,
+    "use_percentiles": false,
+    "clip_outliers": true,
+    "apply_sincos_state_encoding": true,
+    "use_relative_action": true
+  }
+}

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fb2227e8a1cd89f903634efd5e9ebb7dceb9f9fda5e74aae6c273a091f4de6b
+size 14645

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba235ce948d5cb5a6f13328804af602cba6d783654fb2fc347f92bf10d26c17d
+size 1465

statistics.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13c61a51051b90d49488f163ef8219ddd962dcf06c26bf54a76242abf457cd61
+size 5713

wandb_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"project": "finetune-gr00t-n1d6", "run_id": "dual_arm_finetune"}