Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

checkpoints/steps_30000_pytorch_model.pt +3 -0
config.json +115 -0
config.yaml +96 -0
dataset_statistics.json +133 -0
run_lerobot_datasets.sh +64 -0
summary.jsonl +10 -0

checkpoints/steps_30000_pytorch_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb1ab1785ec9ca107b1e211ba9d6e390396c475921f6b834f4d9371b137321e
+size 8935435793

config.json ADDED Viewed

	@@ -0,0 +1,115 @@

+{
+  "run_id": "0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k",
+  "run_root_dir": "./playground/Checkpoints",
+  "seed": 42,
+  "trackers": [
+    "jsonl",
+    "wandb"
+  ],
+  "wandb_entity": "michaelyu-1101-fudanuniversity",
+  "wandb_project": "Internvla",
+  "is_debug": false,
+  "framework": {
+    "framework_py": "DinoQFormerACT",
+    "qwenvl": {
+      "base_vlm": "/mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000/",
+      "attn_implementation": "flash_attention_2",
+      "vl_hidden_dim": 2048
+    },
+    "dino": {
+      "dino_backbone": "dinov2_vitl14"
+    },
+    "layer_qformer": {
+      "qformer_end_layer": 37,
+      "qformer_start_layer": 36,
+      "num_query_tokens": 64,
+      "grad_scale": 0.5
+    },
+    "action_model": {
+      "action_model_type": "DiT-B",
+      "action_hidden_dim": 768,
+      "action_dim": 7,
+      "use_ema": false,
+      "future_action_window_size": 7,
+      "past_action_window_size": 0,
+      "repeated_diffusion_steps": 8
+    },
+    "reduce_in_full_precision": true
+  },
+  "datasets": {
+    "vlm_data": {
+      "dataformat": "llava_json",
+      "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
+      "eval_dataset": "aokvqa_cauldron_llava_format",
+      "data_flatten": false,
+      "base_interval": 2,
+      "max_pixels": 50176,
+      "min_pixels": 784,
+      "fix_image_size": [
+        224,
+        224
+      ],
+      "model_max_length": 1024,
+      "model_type": "qwen2.5vl",
+      "per_device_batch_size": 4
+    },
+    "vla_data": {
+      "dataset_py": "lerobot_libero",
+      "data_root_dir": "playground/Datasets/LEROBOT_LIBERO_DATA",
+      "data_mix": "libero_object",
+      "action_type": "delta_qpos",
+      "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
+      "CoT_answer": "bbox",
+      "default_image_resolution": [
+        3,
+        224,
+        224
+      ],
+      "per_device_batch_size": 16,
+      "load_all_data_for_training": true,
+      "obs": [
+        "image_0"
+      ]
+    }
+  },
+  "trainer": {
+    "epochs": 100,
+    "max_train_steps": 100000,
+    "num_warmup_steps": 5000,
+    "save_interval": 10000,
+    "eval_interval": 1000,
+    "learning_rate": {
+      "base": 2.5e-05
+    },
+    "lr_scheduler_type": "cosine_with_min_lr",
+    "scheduler_specific_kwargs": {
+      "min_lr": 1e-06
+    },
+    "freeze_modules": "",
+    "loss_scale": {
+      "vla": 1.0,
+      "vlm": 0.1
+    },
+    "max_grad_norm": 1.0,
+    "warmup_ratio": 0.1,
+    "weight_decay": 0.0,
+    "logging_frequency": 10,
+    "gradient_clipping": 1.0,
+    "gradient_accumulation_steps": 1,
+    "optimizer": {
+      "name": "AdamW",
+      "betas": [
+        0.9,
+        0.95
+      ],
+      "eps": 1e-08,
+      "weight_decay": 1e-08
+    },
+    "is_resume": false,
+    "resume_epoch": null,
+    "resume_step": null,
+    "enable_gradient_checkpointing": true,
+    "enable_mixed_precision_training": true
+  },
+  "output_dir": "./playground/Checkpoints/0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k"
+}

config.yaml ADDED Viewed

	@@ -0,0 +1,96 @@

+run_id: 0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k
+run_root_dir: ./playground/Checkpoints
+seed: 42
+trackers:
+- jsonl
+- wandb
+wandb_entity: michaelyu-1101-fudanuniversity
+wandb_project: Internvla
+is_debug: false
+framework:
+  framework_py: DinoQFormerACT
+  qwenvl:
+    base_vlm: /mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000/
+    attn_implementation: flash_attention_2
+    vl_hidden_dim: 2048
+  dino:
+    dino_backbone: dinov2_vitl14
+  layer_qformer:
+    qformer_end_layer: 37
+    qformer_start_layer: 36
+    num_query_tokens: 64
+    grad_scale: 0.5
+  action_model:
+    action_model_type: DiT-B
+    action_hidden_dim: 768
+    action_dim: 7
+    use_ema: false
+    future_action_window_size: 7
+    past_action_window_size: 0
+    repeated_diffusion_steps: 8
+  reduce_in_full_precision: true
+datasets:
+  vlm_data:
+    dataformat: llava_json
+    dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
+    eval_dataset: aokvqa_cauldron_llava_format
+    data_flatten: false
+    base_interval: 2
+    max_pixels: 50176
+    min_pixels: 784
+    fix_image_size:
+    - 224
+    - 224
+    model_max_length: 1024
+    model_type: qwen2.5vl
+    per_device_batch_size: 4
+  vla_data:
+    dataset_py: lerobot_libero
+    data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA
+    data_mix: libero_object
+    action_type: delta_qpos
+    CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
+      Locate their bounding boxes in [x1,y1,x2,y2] format.
+    CoT_answer: bbox
+    default_image_resolution:
+    - 3
+    - 224
+    - 224
+    per_device_batch_size: 16
+    load_all_data_for_training: true
+    obs:
+    - image_0
+trainer:
+  epochs: 100
+  max_train_steps: 100000
+  num_warmup_steps: 5000
+  save_interval: 10000
+  eval_interval: 1000
+  learning_rate:
+    base: 2.5e-05
+  lr_scheduler_type: cosine_with_min_lr
+  scheduler_specific_kwargs:
+    min_lr: 1.0e-06
+  freeze_modules: ''
+  loss_scale:
+    vla: 1.0
+    vlm: 0.1
+  max_grad_norm: 1.0
+  warmup_ratio: 0.1
+  weight_decay: 0.0
+  logging_frequency: 10
+  gradient_clipping: 1.0
+  gradient_accumulation_steps: 1
+  optimizer:
+    name: AdamW
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+  is_resume: false
+  resume_epoch: null
+  resume_step: null
+  enable_gradient_checkpointing: true
+  enable_mixed_precision_training: true
+output_dir: ./playground/Checkpoints/0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k

dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "franka": {
+    "action": {
+      "mean": [
+        0.07096529006958008,
+        0.13498851656913757,
+        -0.04601382836699486,
+        0.00123520044144243,
+        0.006998839322477579,
+        -0.015027612447738647,
+        0.46428999304771423
+      ],
+      "std": [
+        0.2681235373020172,
+        0.43846824765205383,
+        0.4474974274635315,
+        0.024446550756692886,
+        0.049355510622262955,
+        0.042107198387384415,
+        0.49879148602485657
+      ],
+      "max": [
+        0.9375,
+        0.8919642567634583,
+        0.9375,
+        0.17678570747375488,
+        0.35035714507102966,
+        0.1810714304447174,
+        1.0
+      ],
+      "min": [
+        -0.8839285969734192,
+        -0.9375,
+        -0.9375,
+        -0.15000000596046448,
+        -0.29035714268684387,
+        -0.32892856001853943,
+        0.0
+      ],
+      "q01": [
+        -0.5383928418159485,
+        -0.8758928775787354,
+        -0.9375,
+        -0.06964285671710968,
+        -0.11678571254014969,
+        -0.15964286029338837,
+        0.0
+      ],
+      "q99": [
+        0.8464285731315613,
+        0.84375,
+        0.9375,
+        0.08142857253551483,
+        0.14892856776714325,
+        0.0867857113480568,
+        1.0
+      ],
+      "mask": [
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false
+      ]
+    },
+    "state": {
+      "mean": [
+        -0.02999030612409115,
+        -0.007947085425257683,
+        0.20293472707271576,
+        3.1086409091949463,
+        -0.21404768526554108,
+        -0.11307074874639511,
+        0.029380427673459053,
+        -0.030556727200746536
+      ],
+      "std": [
+        0.06694897264242172,
+        0.17608462274074554,
+        0.07807064801454544,
+        0.08684843033552649,
+        0.33540457487106323,
+        0.20728276669979095,
+        0.00956575945019722,
+        0.009197483770549297
+      ],
+      "max": [
+        0.14580604434013367,
+        0.33216384053230286,
+        0.3857804834842682,
+        3.4003844261169434,
+        0.7954911589622498,
+        0.6642207503318787,
+        0.04104341194033623,
+        -0.00018117300351150334
+      ],
+      "min": [
+        -0.1765444278717041,
+        -0.29457300901412964,
+        0.008128180168569088,
+        2.2890501022338867,
+        -1.883241891860962,
+        -1.0600427389144897,
+        0.0006495157140307128,
+        -0.041782498359680176
+      ],
+      "q01": [
+        -0.14911890715360643,
+        -0.25978428691625594,
+        0.009925739830359817,
+        2.7545341420173646,
+        -1.3996034812927245,
+        -0.6867720144987106,
+        0.008197814421728254,
+        -0.04015838988125324
+      ],
+      "q99": [
+        0.09063626825809479,
+        0.29066365867853167,
+        0.3370887073874472,
+        3.2611824750900267,
+        0.32092821151018125,
+        0.4037663781642913,
+        0.039891827926039694,
+        -0.009106044843792932
+      ]
+    },
+    "num_transitions": 66984,
+    "num_trajectories": 454
+  }
+}

run_lerobot_datasets.sh ADDED Viewed

	@@ -0,0 +1,64 @@

+export HF_HOME=/mnt/petrelfs/share/yejinhui/Models/huggingface_cache
+export NCCL_SOCKET_IFNAME=bond0
+export NCCL_IB_HCA=mlx5_2,mlx5_3
+# 用于check save 的时候的通信
+export NCCL_BLOCKING_WAIT=1
+export NCCL_ASYNC_ERROR_HANDLING=1
+export NCCL_TIMEOUT=1000  # 超时时间设为 1 小时（单位：秒）
+cd /mnt/petrelfs/yujunqiu/code/vla-baseline/llavavla-00hf1
+# MODEL_PATH=/mnt/petrelfs/yejinhui/Projects/llavavla/playground/Pretrained_models/Qwen2.5-VL-3B-Instruct # must be a local path, due to simpler will run in other where
+# data_root_dir=./playground/Datasets/OXE_LEROBOT_DATASET
+run_root_dir=./playground/Checkpoints
+task_name=libero_object
+run_id=0911_${task_name}_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k
+export WANDB_MODE=disabled
+output_dir=${run_root_dir}/${run_id}
+mkdir -p ${output_dir}
+# mv this script to the output dir
+cp $0 ${output_dir}/
+  # --pretrained_checkpoint ${MODEL_PATH} \
+# export CUDA_VISIBLE_DEVICES=4,5,6,7
+  # --datasets.vla_data.data_mix libero_goal \
+  # --framework.framework_py qwenpi \
+DEBUG=False
+# DEBUG=True
+if [ "$DEBUG" = True ]; then
+  num_processes=1
+  run_id=debug
+else
+  num_processes=8
+fi
+accelerate launch \
+  --config_file scripts/run_scripts/deepspeed_zero2.yaml \
+  --num_processes ${num_processes} \
+  llavavla/training/train_qwenvla.py \
+  --config_yaml ./llavavla/config/lerobot_data/qwenvla_cotrain_libero.yaml \
+  --datasets.vla_data.per_device_batch_size 16 \
+  --datasets.vla_data.data_mix ${task_name} \
+  --framework.action_model.future_action_window_size 7 \
+  --trainer.max_train_steps 100_000 \
+  --trainer.save_interval 10_000 \
+  --run_root_dir ${run_root_dir} \
+  --run_id ${run_id} \
+  --wandb_project Internvla \
+  --wandb_entity michaelyu-1101-fudanuniversity \
+  --is_debug ${DEBUG} \
+  --framework.qwenvl.base_vlm /mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000/
+#   --framework.qwenvl.base_vlm ${MODEL_PATH} \
+#   --data_root_dir ${data_root_dir} \
+  # --is_debug True

summary.jsonl ADDED Viewed

	@@ -0,0 +1,10 @@

+{"steps": 10000}
+{"steps": 20000}
+{"steps": 30000}
+{"steps": 40000}
+{"steps": 50000}
+{"steps": 60000}
+{"steps": 70000}
+{"steps": 80000}
+{"steps": 90000}
+{"steps": 100000}