Upload folder using huggingface_hub
Browse files- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.json +151 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/summary.jsonl +4 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.json +151 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/summary.jsonl +4 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_10000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_20000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_30000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_40000_pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.json +151 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.yaml +130 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/dataset_statistics.json +480 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/final_model/pytorch_model.pt +3 -0
- intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/summary.jsonl +4 -0
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6eb192d58e53b54f0b010b6d43849558fb1a823b0a8c3268528204afbd6107a7
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b7acfbf138a67f3e10c7bd176b430a6f2c5cb15d078578caca4790cff361b3a
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d844e8539befdcf39f8796628db021b11e47af9087ce5a68a1e117b26951bf8
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09fb073f88283e564189108f6f6c7200cc27a1438b6165537846fb14fd62154b
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyBendPick",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/hfm/jliu/simple/G1WholebodyBendPick-v0-psi0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 10000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyBendPick/20260404_062517"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyBendPick
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /hfm/jliu/simple/G1WholebodyBendPick-v0-psi0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 10000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyBendPick/20260404_062517
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.0008055749931372702,
|
| 6 |
+
0.003662512404844165,
|
| 7 |
+
0.031012117862701416,
|
| 8 |
+
-0.012596862390637398,
|
| 9 |
+
-0.027946343645453453,
|
| 10 |
+
-0.023698121309280396,
|
| 11 |
+
-0.03487454727292061,
|
| 12 |
+
0.1510307490825653,
|
| 13 |
+
-0.10410650074481964,
|
| 14 |
+
-0.16438010334968567,
|
| 15 |
+
0.160273939371109,
|
| 16 |
+
0.38479897379875183,
|
| 17 |
+
0.22493480145931244,
|
| 18 |
+
0.29762107133865356,
|
| 19 |
+
-0.020675089210271835,
|
| 20 |
+
-0.011319637298583984,
|
| 21 |
+
-0.02104736864566803,
|
| 22 |
+
-0.0047059389762580395,
|
| 23 |
+
-0.10232270509004593,
|
| 24 |
+
-0.08670033514499664,
|
| 25 |
+
-0.052605174481868744,
|
| 26 |
+
-0.1269960105419159,
|
| 27 |
+
-0.23249554634094238,
|
| 28 |
+
-0.022764088585972786,
|
| 29 |
+
0.04807743430137634,
|
| 30 |
+
0.058973122388124466,
|
| 31 |
+
-0.11149747669696808,
|
| 32 |
+
0.25905102491378784,
|
| 33 |
+
0.08001509308815002,
|
| 34 |
+
0.16946518421173096,
|
| 35 |
+
0.008698188699781895,
|
| 36 |
+
0.4670487940311432,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0,
|
| 39 |
+
0.0,
|
| 40 |
+
0.0
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.026223942637443542,
|
| 44 |
+
0.026004673913121223,
|
| 45 |
+
0.05157838761806488,
|
| 46 |
+
0.028955502435564995,
|
| 47 |
+
0.04606349393725395,
|
| 48 |
+
0.04066930338740349,
|
| 49 |
+
0.05465922877192497,
|
| 50 |
+
0.16792035102844238,
|
| 51 |
+
0.14840462803840637,
|
| 52 |
+
0.20773763954639435,
|
| 53 |
+
0.201939195394516,
|
| 54 |
+
0.43836456537246704,
|
| 55 |
+
0.2673484683036804,
|
| 56 |
+
0.3443882465362549,
|
| 57 |
+
0.028416506946086884,
|
| 58 |
+
0.021467505022883415,
|
| 59 |
+
0.032287437468767166,
|
| 60 |
+
0.03757817670702934,
|
| 61 |
+
0.11277986317873001,
|
| 62 |
+
0.09554938971996307,
|
| 63 |
+
0.06037674844264984,
|
| 64 |
+
0.15188516676425934,
|
| 65 |
+
0.2564069926738739,
|
| 66 |
+
0.05658402293920517,
|
| 67 |
+
0.06496992707252502,
|
| 68 |
+
0.07428737729787827,
|
| 69 |
+
0.12488652765750885,
|
| 70 |
+
0.28678208589553833,
|
| 71 |
+
0.13087092339992523,
|
| 72 |
+
0.15007734298706055,
|
| 73 |
+
0.05011340230703354,
|
| 74 |
+
0.06933368742465973,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.12923675775527954,
|
| 82 |
+
0.10485697537660599,
|
| 83 |
+
0.22624923288822174,
|
| 84 |
+
0.022101346403360367,
|
| 85 |
+
0.03615603595972061,
|
| 86 |
+
0.009702185168862343,
|
| 87 |
+
0.008694176562130451,
|
| 88 |
+
0.38237157464027405,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0,
|
| 91 |
+
0.6359286308288574,
|
| 92 |
+
1.1968196630477905,
|
| 93 |
+
0.7969403266906738,
|
| 94 |
+
0.9798202514648438,
|
| 95 |
+
0.02855294942855835,
|
| 96 |
+
0.09991803765296936,
|
| 97 |
+
0.05098697543144226,
|
| 98 |
+
0.1499277651309967,
|
| 99 |
+
3.8650854548905045e-05,
|
| 100 |
+
0.0012129372917115688,
|
| 101 |
+
0.006128575652837753,
|
| 102 |
+
0.01944165676832199,
|
| 103 |
+
0.00040899173473007977,
|
| 104 |
+
0.19534528255462646,
|
| 105 |
+
0.298308402299881,
|
| 106 |
+
0.3513643741607666,
|
| 107 |
+
0.0012087320210412145,
|
| 108 |
+
0.8875377178192139,
|
| 109 |
+
0.3364854156970978,
|
| 110 |
+
0.4499310553073883,
|
| 111 |
+
0.18629509210586548,
|
| 112 |
+
0.75,
|
| 113 |
+
0.0,
|
| 114 |
+
0.0,
|
| 115 |
+
0.0,
|
| 116 |
+
0.0
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
-0.09775445610284805,
|
| 120 |
+
-0.0823368951678276,
|
| 121 |
+
-0.027741333469748497,
|
| 122 |
+
-0.1810884326696396,
|
| 123 |
+
-0.21405744552612305,
|
| 124 |
+
-0.20122334361076355,
|
| 125 |
+
-0.295552521944046,
|
| 126 |
+
-0.015388990752398968,
|
| 127 |
+
-0.49568232893943787,
|
| 128 |
+
-0.6491441130638123,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0,
|
| 133 |
+
-0.11302866041660309,
|
| 134 |
+
-0.06130801886320114,
|
| 135 |
+
-0.2347739338874817,
|
| 136 |
+
-0.149064838886261,
|
| 137 |
+
-0.34618079662323,
|
| 138 |
+
-0.296110063791275,
|
| 139 |
+
-0.18200430274009705,
|
| 140 |
+
-0.5555615425109863,
|
| 141 |
+
-0.7934529185295105,
|
| 142 |
+
-0.3188854157924652,
|
| 143 |
+
-0.09826192259788513,
|
| 144 |
+
-3.864927566610277e-05,
|
| 145 |
+
-0.406229704618454,
|
| 146 |
+
0.0,
|
| 147 |
+
-0.0435166172683239,
|
| 148 |
+
-0.014203691855072975,
|
| 149 |
+
-0.05216570198535919,
|
| 150 |
+
0.44999998807907104,
|
| 151 |
+
0.0,
|
| 152 |
+
0.0,
|
| 153 |
+
0.0,
|
| 154 |
+
0.0
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
-0.044115488231182096,
|
| 158 |
+
-0.05932579189538956,
|
| 159 |
+
-0.0008548528398387134,
|
| 160 |
+
-0.14063991606235504,
|
| 161 |
+
-0.18160852789878845,
|
| 162 |
+
-0.13885661959648132,
|
| 163 |
+
-0.2704181373119354,
|
| 164 |
+
-0.015381569974124432,
|
| 165 |
+
-0.49547951817512514,
|
| 166 |
+
-0.6489888429641724,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
-0.09533142298460007,
|
| 172 |
+
-0.057845381833612916,
|
| 173 |
+
-0.11222188174724579,
|
| 174 |
+
-0.12452998012304306,
|
| 175 |
+
-0.33181595951318743,
|
| 176 |
+
-0.26951175928115845,
|
| 177 |
+
-0.16966578140854835,
|
| 178 |
+
-0.504463392496109,
|
| 179 |
+
-0.7292503118515015,
|
| 180 |
+
-0.21955281451344488,
|
| 181 |
+
-0.03430207073688507,
|
| 182 |
+
-3.847765765385702e-05,
|
| 183 |
+
-0.3624899685382843,
|
| 184 |
+
0.0,
|
| 185 |
+
-0.04341023042798042,
|
| 186 |
+
-0.01279706321656704,
|
| 187 |
+
-0.03870870623737574,
|
| 188 |
+
0.44999998807907104,
|
| 189 |
+
0.0,
|
| 190 |
+
0.0,
|
| 191 |
+
0.0,
|
| 192 |
+
0.0
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.08541888743638992,
|
| 196 |
+
0.08673391491174698,
|
| 197 |
+
0.20383067429065704,
|
| 198 |
+
0.010909304022789001,
|
| 199 |
+
0.005769035266712276,
|
| 200 |
+
0.007609221618622541,
|
| 201 |
+
0.0008619268686742959,
|
| 202 |
+
0.382364958524704,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0,
|
| 205 |
+
0.6356749534606934,
|
| 206 |
+
1.1965277194976807,
|
| 207 |
+
0.7967880129814148,
|
| 208 |
+
0.9795168793201446,
|
| 209 |
+
0.010912260971963406,
|
| 210 |
+
0.04415091205388274,
|
| 211 |
+
0.02633006187155817,
|
| 212 |
+
0.06915374681353538,
|
| 213 |
+
3.84151644539088e-05,
|
| 214 |
+
0.001203537336550653,
|
| 215 |
+
3.166658199916128e-05,
|
| 216 |
+
0.002674137009307742,
|
| 217 |
+
0.000406751612899825,
|
| 218 |
+
0.12524892970919546,
|
| 219 |
+
0.21630814880132343,
|
| 220 |
+
0.2785507142543793,
|
| 221 |
+
0.0011997524416074157,
|
| 222 |
+
0.847134844660759,
|
| 223 |
+
0.3044067323207855,
|
| 224 |
+
0.43886008858680725,
|
| 225 |
+
0.13876871764659882,
|
| 226 |
+
0.75,
|
| 227 |
+
0.0,
|
| 228 |
+
0.0,
|
| 229 |
+
0.0,
|
| 230 |
+
0.0
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
-0.0054089887998998165,
|
| 274 |
+
0.005827104672789574,
|
| 275 |
+
0.031063664704561234,
|
| 276 |
+
-0.013071244582533836,
|
| 277 |
+
-0.027740946039557457,
|
| 278 |
+
-0.024281593039631844,
|
| 279 |
+
-0.03443169221282005,
|
| 280 |
+
0.14551377296447754,
|
| 281 |
+
-0.04790256544947624,
|
| 282 |
+
-0.16233190894126892,
|
| 283 |
+
0.12968285381793976,
|
| 284 |
+
0.37486323714256287,
|
| 285 |
+
0.1825636923313141,
|
| 286 |
+
0.28982678055763245,
|
| 287 |
+
-0.01859004981815815,
|
| 288 |
+
-0.011797229759395123,
|
| 289 |
+
-0.02136491984128952,
|
| 290 |
+
-0.0015487042255699635,
|
| 291 |
+
-0.10206520557403564,
|
| 292 |
+
-0.08541297912597656,
|
| 293 |
+
-0.05251696705818176,
|
| 294 |
+
-0.12422259151935577,
|
| 295 |
+
-0.23228920996189117,
|
| 296 |
+
-0.023111552000045776,
|
| 297 |
+
0.05130844563245773,
|
| 298 |
+
0.05851978436112404,
|
| 299 |
+
-0.10995743423700333,
|
| 300 |
+
0.2582551836967468,
|
| 301 |
+
0.0,
|
| 302 |
+
-0.1490357518196106,
|
| 303 |
+
0.0,
|
| 304 |
+
0.4670488238334656
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
0.026754410937428474,
|
| 308 |
+
0.02570926956832409,
|
| 309 |
+
0.05101049691438675,
|
| 310 |
+
0.02832541987299919,
|
| 311 |
+
0.04559871181845665,
|
| 312 |
+
0.04026580974459648,
|
| 313 |
+
0.05430656298995018,
|
| 314 |
+
0.17061059176921844,
|
| 315 |
+
0.056338630616664886,
|
| 316 |
+
0.20902083814144135,
|
| 317 |
+
0.14012272655963898,
|
| 318 |
+
0.4300926923751831,
|
| 319 |
+
0.19461630284786224,
|
| 320 |
+
0.3383548855781555,
|
| 321 |
+
0.02924812026321888,
|
| 322 |
+
0.021750465035438538,
|
| 323 |
+
0.032564036548137665,
|
| 324 |
+
0.03753108158707619,
|
| 325 |
+
0.11267656087875366,
|
| 326 |
+
0.09539038687944412,
|
| 327 |
+
0.0603468157351017,
|
| 328 |
+
0.15201117098331451,
|
| 329 |
+
0.2566412389278412,
|
| 330 |
+
0.05685749650001526,
|
| 331 |
+
0.06506538391113281,
|
| 332 |
+
0.07393182814121246,
|
| 333 |
+
0.1244540736079216,
|
| 334 |
+
0.28631067276000977,
|
| 335 |
+
0.0,
|
| 336 |
+
0.012014704756438696,
|
| 337 |
+
0.0,
|
| 338 |
+
0.06933368742465973
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
0.12999998033046722,
|
| 342 |
+
0.10700006783008575,
|
| 343 |
+
0.2280000001192093,
|
| 344 |
+
0.0,
|
| 345 |
+
0.0,
|
| 346 |
+
0.0,
|
| 347 |
+
0.0,
|
| 348 |
+
0.5130000114440918,
|
| 349 |
+
0.1720000058412552,
|
| 350 |
+
-0.0010000000474974513,
|
| 351 |
+
0.4450001120567322,
|
| 352 |
+
1.375,
|
| 353 |
+
0.5569999814033508,
|
| 354 |
+
1.1579999923706055,
|
| 355 |
+
0.031000027433037758,
|
| 356 |
+
0.09900011122226715,
|
| 357 |
+
0.05100004002451897,
|
| 358 |
+
0.15700000524520874,
|
| 359 |
+
0.0,
|
| 360 |
+
0.003000000026077032,
|
| 361 |
+
0.006000017747282982,
|
| 362 |
+
0.020999999716877937,
|
| 363 |
+
0.0010000000474974513,
|
| 364 |
+
0.20200000703334808,
|
| 365 |
+
0.31200000643730164,
|
| 366 |
+
0.35199999809265137,
|
| 367 |
+
0.003000000026077032,
|
| 368 |
+
0.8899999856948853,
|
| 369 |
+
0.0,
|
| 370 |
+
0.0,
|
| 371 |
+
0.0,
|
| 372 |
+
0.75
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
-0.09799999743700027,
|
| 376 |
+
-0.08199996501207352,
|
| 377 |
+
-2.3887020139667925e-10,
|
| 378 |
+
-0.18299999833106995,
|
| 379 |
+
-0.22200000286102295,
|
| 380 |
+
-0.21500006318092346,
|
| 381 |
+
-0.29699963331222534,
|
| 382 |
+
-0.01600000075995922,
|
| 383 |
+
-0.20600000023841858,
|
| 384 |
+
-0.8199999928474426,
|
| 385 |
+
0.004000000189989805,
|
| 386 |
+
0.0010000000474974513,
|
| 387 |
+
0.0010000000474974513,
|
| 388 |
+
0.0010000000474974513,
|
| 389 |
+
-0.1139998808503151,
|
| 390 |
+
-0.06499999761581421,
|
| 391 |
+
-0.2379997819662094,
|
| 392 |
+
-0.14899970591068268,
|
| 393 |
+
-0.34599998593330383,
|
| 394 |
+
-0.29499998688697815,
|
| 395 |
+
-0.18199999630451202,
|
| 396 |
+
-0.5590000152587891,
|
| 397 |
+
-0.800000011920929,
|
| 398 |
+
-0.32399997115135193,
|
| 399 |
+
-0.09600000083446503,
|
| 400 |
+
0.0,
|
| 401 |
+
-0.4069998264312744,
|
| 402 |
+
0.0,
|
| 403 |
+
0.0,
|
| 404 |
+
-0.15000000596046448,
|
| 405 |
+
0.0,
|
| 406 |
+
0.44999998807907104
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
-0.04413001623004675,
|
| 410 |
+
-0.05999999865889549,
|
| 411 |
+
0.0,
|
| 412 |
+
-0.14100000262260437,
|
| 413 |
+
-0.1809999942779541,
|
| 414 |
+
-0.13899989426136017,
|
| 415 |
+
-0.2699997127056122,
|
| 416 |
+
-0.014999999664723873,
|
| 417 |
+
-0.15199999511241913,
|
| 418 |
+
-0.7352600109577179,
|
| 419 |
+
0.004000000189989805,
|
| 420 |
+
0.0010000000474974513,
|
| 421 |
+
0.004000000189989805,
|
| 422 |
+
0.0010000000474974513,
|
| 423 |
+
-0.09499987408518791,
|
| 424 |
+
-0.05999991346150637,
|
| 425 |
+
-0.11399985201656819,
|
| 426 |
+
-0.12030089475214481,
|
| 427 |
+
-0.3310000002384186,
|
| 428 |
+
-0.26899999380111694,
|
| 429 |
+
-0.17000000178813934,
|
| 430 |
+
-0.49499997884035113,
|
| 431 |
+
-0.7279999852180481,
|
| 432 |
+
-0.2192599435150623,
|
| 433 |
+
-0.031000016499310733,
|
| 434 |
+
0.0,
|
| 435 |
+
-0.36000001430511475,
|
| 436 |
+
0.0,
|
| 437 |
+
0.0,
|
| 438 |
+
-0.15000000596046448,
|
| 439 |
+
0.0,
|
| 440 |
+
0.44999998807907104
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
0.08499999344348907,
|
| 444 |
+
0.0870000347495079,
|
| 445 |
+
0.20399999618530273,
|
| 446 |
+
0.0,
|
| 447 |
+
0.0,
|
| 448 |
+
0.0,
|
| 449 |
+
0.0,
|
| 450 |
+
0.39899998903274536,
|
| 451 |
+
0.06999966129660606,
|
| 452 |
+
-0.0010000000474974513,
|
| 453 |
+
0.41100001335144043,
|
| 454 |
+
1.2691300439834587,
|
| 455 |
+
0.49799999594688416,
|
| 456 |
+
1.0709999799728394,
|
| 457 |
+
0.010999999940395355,
|
| 458 |
+
0.0430000014603138,
|
| 459 |
+
0.02513002794235865,
|
| 460 |
+
0.07100000232458115,
|
| 461 |
+
0.0,
|
| 462 |
+
0.0020000000949949026,
|
| 463 |
+
0.0,
|
| 464 |
+
0.00800000037997961,
|
| 465 |
+
0.0010000000474974513,
|
| 466 |
+
0.12513000026345172,
|
| 467 |
+
0.22699999809265137,
|
| 468 |
+
0.27900001406669617,
|
| 469 |
+
0.0020000000949949026,
|
| 470 |
+
0.8460000157356262,
|
| 471 |
+
0.0,
|
| 472 |
+
-0.15000000596046448,
|
| 473 |
+
0.0,
|
| 474 |
+
0.75
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 15488,
|
| 478 |
+
"num_trajectories": 100
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6d4b5d78ab54906c896a0bdec9d3f4a1acee68967f7de3e97d4248a621f895b
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/summary.jsonl
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 10000}
|
| 2 |
+
{"steps": 20000}
|
| 3 |
+
{"steps": 30000}
|
| 4 |
+
{"steps": 40000}
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f7076e5a04ec649ad9a69338a77ec461423adad28c3df8aa9edbc43509609ac
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:000f727fc7ee8f52446c0e213b22b610470898803f973c4a51efb4f5b4a1960e
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9d468e3720d06c4d12dda0c639d8d3c091a10b70f627fc884869b682fe8dac4
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af0861c89f79bae652d478bc7d141061ea25d2afc78a28dd221e122f8166cab1
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyTabletopGrasp",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/hfm/jliu/simple/G1WholebodyTabletopGrasp-v0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 10000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyTabletopGrasp/20260404_061207"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyTabletopGrasp
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /hfm/jliu/simple/G1WholebodyTabletopGrasp-v0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 10000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyTabletopGrasp/20260404_061207
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
-0.007915518246591091,
|
| 6 |
+
0.0185376163572073,
|
| 7 |
+
0.05745560675859451,
|
| 8 |
+
-0.048222675919532776,
|
| 9 |
+
-0.05002971738576889,
|
| 10 |
+
-0.04234893247485161,
|
| 11 |
+
-0.044687654823064804,
|
| 12 |
+
0.30921489000320435,
|
| 13 |
+
0.13608524203300476,
|
| 14 |
+
-0.3551200032234192,
|
| 15 |
+
0.3016810417175293,
|
| 16 |
+
0.7096384167671204,
|
| 17 |
+
0.43915659189224243,
|
| 18 |
+
0.729606568813324,
|
| 19 |
+
0.0022646363358944654,
|
| 20 |
+
0.023206504061818123,
|
| 21 |
+
-0.022678690031170845,
|
| 22 |
+
0.004449035506695509,
|
| 23 |
+
-0.023373732343316078,
|
| 24 |
+
-0.0023263380862772465,
|
| 25 |
+
0.02248271182179451,
|
| 26 |
+
-0.5447278022766113,
|
| 27 |
+
-0.13919976353645325,
|
| 28 |
+
0.32043108344078064,
|
| 29 |
+
0.6696299910545349,
|
| 30 |
+
-0.47840023040771484,
|
| 31 |
+
-0.09356574714183807,
|
| 32 |
+
-0.13595955073833466,
|
| 33 |
+
-0.023428356274962425,
|
| 34 |
+
-0.0008574495441280305,
|
| 35 |
+
-0.0011587876360863447,
|
| 36 |
+
0.75,
|
| 37 |
+
0.0,
|
| 38 |
+
0.0,
|
| 39 |
+
0.0,
|
| 40 |
+
0.0
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.06855416297912598,
|
| 44 |
+
0.04814520105719566,
|
| 45 |
+
0.0739685669541359,
|
| 46 |
+
0.08283186703920364,
|
| 47 |
+
0.07103259861469269,
|
| 48 |
+
0.06894790381193161,
|
| 49 |
+
0.07200707495212555,
|
| 50 |
+
0.22745577991008759,
|
| 51 |
+
0.0967465490102768,
|
| 52 |
+
0.2211156189441681,
|
| 53 |
+
0.19299180805683136,
|
| 54 |
+
0.4131201207637787,
|
| 55 |
+
0.27728959918022156,
|
| 56 |
+
0.47446826100349426,
|
| 57 |
+
0.003659198060631752,
|
| 58 |
+
0.049973152577877045,
|
| 59 |
+
0.048772457987070084,
|
| 60 |
+
0.00308406469412148,
|
| 61 |
+
0.050082605332136154,
|
| 62 |
+
0.008206783793866634,
|
| 63 |
+
0.04823305457830429,
|
| 64 |
+
0.3143916130065918,
|
| 65 |
+
0.10105322301387787,
|
| 66 |
+
0.24209849536418915,
|
| 67 |
+
0.42984139919281006,
|
| 68 |
+
0.29979273676872253,
|
| 69 |
+
0.1555895209312439,
|
| 70 |
+
0.1276027262210846,
|
| 71 |
+
0.011642614379525185,
|
| 72 |
+
0.02119765430688858,
|
| 73 |
+
0.010956699028611183,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.21513332426548004,
|
| 82 |
+
0.21692107617855072,
|
| 83 |
+
0.3652719259262085,
|
| 84 |
+
0.07139641791582108,
|
| 85 |
+
0.015001054853200912,
|
| 86 |
+
0.03918211907148361,
|
| 87 |
+
0.03575323149561882,
|
| 88 |
+
0.6107784509658813,
|
| 89 |
+
0.31583136320114136,
|
| 90 |
+
-0.00040738514508120716,
|
| 91 |
+
0.6836385726928711,
|
| 92 |
+
1.4285058975219727,
|
| 93 |
+
0.8524638414382935,
|
| 94 |
+
1.7429704666137695,
|
| 95 |
+
0.00735096400603652,
|
| 96 |
+
0.25089067220687866,
|
| 97 |
+
0.04510946571826935,
|
| 98 |
+
0.017853474244475365,
|
| 99 |
+
0.046191196888685226,
|
| 100 |
+
0.0031940839253365993,
|
| 101 |
+
0.24179035425186157,
|
| 102 |
+
0.0037467884831130505,
|
| 103 |
+
0.0002910589682869613,
|
| 104 |
+
0.7421935796737671,
|
| 105 |
+
1.6607650518417358,
|
| 106 |
+
-3.8668040360789746e-05,
|
| 107 |
+
0.3736472725868225,
|
| 108 |
+
0.30042290687561035,
|
| 109 |
+
-0.014337222091853619,
|
| 110 |
+
0.02500663883984089,
|
| 111 |
+
0.013617209158837795,
|
| 112 |
+
0.75,
|
| 113 |
+
0.0,
|
| 114 |
+
0.0,
|
| 115 |
+
0.0,
|
| 116 |
+
0.0
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
-0.21088384091854095,
|
| 120 |
+
-0.11738907545804977,
|
| 121 |
+
-0.013445371761918068,
|
| 122 |
+
-0.5775371193885803,
|
| 123 |
+
-0.30140629410743713,
|
| 124 |
+
-0.3430681824684143,
|
| 125 |
+
-0.37358492612838745,
|
| 126 |
+
-0.007524379529058933,
|
| 127 |
+
-0.01704181358218193,
|
| 128 |
+
-0.6932834386825562,
|
| 129 |
+
0.0026604870799928904,
|
| 130 |
+
0.000401000608690083,
|
| 131 |
+
0.0007143893744796515,
|
| 132 |
+
0.0001968950527952984,
|
| 133 |
+
-0.022009270265698433,
|
| 134 |
+
-0.04607510566711426,
|
| 135 |
+
-0.24804681539535522,
|
| 136 |
+
-0.005683199502527714,
|
| 137 |
+
-0.25139108300209045,
|
| 138 |
+
-0.05519897863268852,
|
| 139 |
+
-0.04459292069077492,
|
| 140 |
+
-1.0918865203857422,
|
| 141 |
+
-0.3572312593460083,
|
| 142 |
+
-0.7850697636604309,
|
| 143 |
+
0.003401592606678605,
|
| 144 |
+
-1.4517900943756104,
|
| 145 |
+
-0.5649155378341675,
|
| 146 |
+
-0.4592915177345276,
|
| 147 |
+
-0.08972926437854767,
|
| 148 |
+
-0.1079544723033905,
|
| 149 |
+
-0.04311269149184227,
|
| 150 |
+
0.75,
|
| 151 |
+
0.0,
|
| 152 |
+
0.0,
|
| 153 |
+
0.0,
|
| 154 |
+
0.0
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
-0.1592998020350933,
|
| 158 |
+
-0.09867189824581146,
|
| 159 |
+
-0.00921511696651578,
|
| 160 |
+
-0.43521402031183243,
|
| 161 |
+
-0.2678038999438286,
|
| 162 |
+
-0.3131137639284134,
|
| 163 |
+
-0.34229206293821335,
|
| 164 |
+
-0.007516397396102548,
|
| 165 |
+
-0.003259584365878254,
|
| 166 |
+
-0.657462865114212,
|
| 167 |
+
0.002886058180592954,
|
| 168 |
+
0.0004246826865710318,
|
| 169 |
+
0.0023594271624460816,
|
| 170 |
+
0.0009427882905583829,
|
| 171 |
+
-0.015005301684141159,
|
| 172 |
+
-0.03586177062243223,
|
| 173 |
+
-0.2033357471227646,
|
| 174 |
+
-0.0044474758906289935,
|
| 175 |
+
-0.20831073820590973,
|
| 176 |
+
-0.03924582712352276,
|
| 177 |
+
-0.03469793684780598,
|
| 178 |
+
-1.0538361966609955,
|
| 179 |
+
-0.35215289890766144,
|
| 180 |
+
-0.0011892615584656596,
|
| 181 |
+
0.0034092417918145657,
|
| 182 |
+
-1.005431056022644,
|
| 183 |
+
-0.49986276030540466,
|
| 184 |
+
-0.4488404765725136,
|
| 185 |
+
-0.06848237849771976,
|
| 186 |
+
-0.0625098580494523,
|
| 187 |
+
-0.031260753981769085,
|
| 188 |
+
0.75,
|
| 189 |
+
0.0,
|
| 190 |
+
0.0,
|
| 191 |
+
0.0,
|
| 192 |
+
0.0
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.1750056967139244,
|
| 196 |
+
0.17476186528801918,
|
| 197 |
+
0.2931685149669647,
|
| 198 |
+
0.028990697581321,
|
| 199 |
+
0.012583295814692974,
|
| 200 |
+
0.013281819876283407,
|
| 201 |
+
0.009987043216824532,
|
| 202 |
+
0.6069400906562805,
|
| 203 |
+
0.31562769412994385,
|
| 204 |
+
-0.0004517402339843102,
|
| 205 |
+
0.6142808347940445,
|
| 206 |
+
1.2800610959529877,
|
| 207 |
+
0.848078653216362,
|
| 208 |
+
1.608409583568573,
|
| 209 |
+
0.005269425339065492,
|
| 210 |
+
0.20807136595249176,
|
| 211 |
+
0.03510108310729265,
|
| 212 |
+
0.016178835183382034,
|
| 213 |
+
0.03597608767449856,
|
| 214 |
+
0.0025836762506514788,
|
| 215 |
+
0.19978401437401772,
|
| 216 |
+
0.0036904277512803674,
|
| 217 |
+
0.00027790151216322556,
|
| 218 |
+
0.7203168272972107,
|
| 219 |
+
1.4561529159545898,
|
| 220 |
+
-3.885765272571007e-05,
|
| 221 |
+
0.22245876863598824,
|
| 222 |
+
0.11632611602544785,
|
| 223 |
+
-0.015507389791309834,
|
| 224 |
+
0.02313118800520897,
|
| 225 |
+
0.011822471395134926,
|
| 226 |
+
0.75,
|
| 227 |
+
0.0,
|
| 228 |
+
0.0,
|
| 229 |
+
0.0,
|
| 230 |
+
0.0
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
-0.007949735037982464,
|
| 274 |
+
0.01838493160903454,
|
| 275 |
+
0.05705238878726959,
|
| 276 |
+
-0.04782641679048538,
|
| 277 |
+
-0.04985744506120682,
|
| 278 |
+
-0.043356288224458694,
|
| 279 |
+
-0.04469400271773338,
|
| 280 |
+
0.3088795244693756,
|
| 281 |
+
0.17249879240989685,
|
| 282 |
+
-0.34438005089759827,
|
| 283 |
+
0.27151158452033997,
|
| 284 |
+
0.6904981136322021,
|
| 285 |
+
0.42134150862693787,
|
| 286 |
+
0.7150870561599731,
|
| 287 |
+
0.006028186995536089,
|
| 288 |
+
0.023279791697859764,
|
| 289 |
+
-0.022360648959875107,
|
| 290 |
+
0.007919888943433762,
|
| 291 |
+
-0.023288242518901825,
|
| 292 |
+
-0.0010538079077377915,
|
| 293 |
+
0.02263154275715351,
|
| 294 |
+
-0.5395280122756958,
|
| 295 |
+
-0.13902321457862854,
|
| 296 |
+
0.32027995586395264,
|
| 297 |
+
0.6713051795959473,
|
| 298 |
+
-0.47778892517089844,
|
| 299 |
+
-0.0925600454211235,
|
| 300 |
+
-0.1357308328151703,
|
| 301 |
+
0.0,
|
| 302 |
+
-0.14878930151462555,
|
| 303 |
+
0.0,
|
| 304 |
+
0.75
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
0.0685795322060585,
|
| 308 |
+
0.04785825312137604,
|
| 309 |
+
0.07355451583862305,
|
| 310 |
+
0.07865653932094574,
|
| 311 |
+
0.06974292546510696,
|
| 312 |
+
0.06802646815776825,
|
| 313 |
+
0.07132686674594879,
|
| 314 |
+
0.22695879638195038,
|
| 315 |
+
0.11018200218677521,
|
| 316 |
+
0.21475042402744293,
|
| 317 |
+
0.15972517430782318,
|
| 318 |
+
0.4049052894115448,
|
| 319 |
+
0.2650735676288605,
|
| 320 |
+
0.4734349250793457,
|
| 321 |
+
0.0036633212585002184,
|
| 322 |
+
0.049966294318437576,
|
| 323 |
+
0.04872097074985504,
|
| 324 |
+
0.0031518512405455112,
|
| 325 |
+
0.05005797743797302,
|
| 326 |
+
0.008245873264968395,
|
| 327 |
+
0.0483190156519413,
|
| 328 |
+
0.3140910267829895,
|
| 329 |
+
0.1009896919131279,
|
| 330 |
+
0.2423640936613083,
|
| 331 |
+
0.4292946755886078,
|
| 332 |
+
0.2999919056892395,
|
| 333 |
+
0.15540601313114166,
|
| 334 |
+
0.12780624628067017,
|
| 335 |
+
0.0,
|
| 336 |
+
0.013509782962501016,
|
| 337 |
+
0.0,
|
| 338 |
+
0.0
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
0.2149999588727951,
|
| 342 |
+
0.21600016951560974,
|
| 343 |
+
0.36500000953674316,
|
| 344 |
+
0.0,
|
| 345 |
+
4.046002644031432e-09,
|
| 346 |
+
0.0,
|
| 347 |
+
1.0771045513835453e-11,
|
| 348 |
+
0.609000027179718,
|
| 349 |
+
0.3600001633167267,
|
| 350 |
+
0.0,
|
| 351 |
+
0.5249999761581421,
|
| 352 |
+
1.3730000257492065,
|
| 353 |
+
0.8119999766349792,
|
| 354 |
+
1.7453292608261108,
|
| 355 |
+
0.014000000432133675,
|
| 356 |
+
0.2500004470348358,
|
| 357 |
+
0.04610275477170944,
|
| 358 |
+
0.020999999716877937,
|
| 359 |
+
0.04600704088807106,
|
| 360 |
+
0.006000000052154064,
|
| 361 |
+
0.24200008809566498,
|
| 362 |
+
0.00800000037997961,
|
| 363 |
+
0.0010000000474974513,
|
| 364 |
+
0.7419999837875366,
|
| 365 |
+
1.6640000343322754,
|
| 366 |
+
6.573184663238862e-09,
|
| 367 |
+
0.37400001287460327,
|
| 368 |
+
0.300999253988266,
|
| 369 |
+
0.0,
|
| 370 |
+
0.0,
|
| 371 |
+
0.0,
|
| 372 |
+
0.75
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
-0.210999995470047,
|
| 376 |
+
-0.11699992418289185,
|
| 377 |
+
-1.1206404693098193e-08,
|
| 378 |
+
-0.5680000185966492,
|
| 379 |
+
-0.29699981212615967,
|
| 380 |
+
-0.3440000116825104,
|
| 381 |
+
-0.37400001287460327,
|
| 382 |
+
-0.00800000037997961,
|
| 383 |
+
-0.004000000189989805,
|
| 384 |
+
-0.6610000133514404,
|
| 385 |
+
0.003000000026077032,
|
| 386 |
+
0.0,
|
| 387 |
+
0.0010000000474974513,
|
| 388 |
+
0.0,
|
| 389 |
+
-0.017999978736042976,
|
| 390 |
+
-0.04700015112757683,
|
| 391 |
+
-0.24899962544441223,
|
| 392 |
+
-0.0020000000949949026,
|
| 393 |
+
-0.2510001063346863,
|
| 394 |
+
-0.053999971598386765,
|
| 395 |
+
-0.045001156628131866,
|
| 396 |
+
-1.0800000429153442,
|
| 397 |
+
-0.3590024709701538,
|
| 398 |
+
-0.781000018119812,
|
| 399 |
+
0.004999999888241291,
|
| 400 |
+
-1.4559999704360962,
|
| 401 |
+
-0.5649999976158142,
|
| 402 |
+
-0.46299999952316284,
|
| 403 |
+
0.0,
|
| 404 |
+
-0.15000000596046448,
|
| 405 |
+
0.0,
|
| 406 |
+
0.75
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
-0.1589999943971634,
|
| 410 |
+
-0.09774992614984512,
|
| 411 |
+
0.0,
|
| 412 |
+
-0.41325000673532486,
|
| 413 |
+
-0.2677498906850815,
|
| 414 |
+
-0.3124999925494194,
|
| 415 |
+
-0.34200000762939453,
|
| 416 |
+
-0.00800000037997961,
|
| 417 |
+
-0.003000000026077032,
|
| 418 |
+
-0.6380000114440918,
|
| 419 |
+
0.003000000026077032,
|
| 420 |
+
0.0,
|
| 421 |
+
0.0020000000949949026,
|
| 422 |
+
0.0010000000474974513,
|
| 423 |
+
-0.009999881265684962,
|
| 424 |
+
-0.035998325794935226,
|
| 425 |
+
-0.197999507188797,
|
| 426 |
+
-0.0010000000474974513,
|
| 427 |
+
-0.20800021663308144,
|
| 428 |
+
-0.03874997515231371,
|
| 429 |
+
-0.03500000014901161,
|
| 430 |
+
-1.0429999828338623,
|
| 431 |
+
-0.35000214725732803,
|
| 432 |
+
-0.0007500000356230885,
|
| 433 |
+
0.007000000216066837,
|
| 434 |
+
-1.0049999952316284,
|
| 435 |
+
-0.49900001287460327,
|
| 436 |
+
-0.45100000500679016,
|
| 437 |
+
0.0,
|
| 438 |
+
-0.15000000596046448,
|
| 439 |
+
0.0,
|
| 440 |
+
0.75
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
0.17499994486570358,
|
| 444 |
+
0.1735001765191555,
|
| 445 |
+
0.29300010204315186,
|
| 446 |
+
0.0,
|
| 447 |
+
6.082890094405435e-11,
|
| 448 |
+
0.0,
|
| 449 |
+
1.7963793159155722e-12,
|
| 450 |
+
0.6050000190734863,
|
| 451 |
+
0.3409999907016754,
|
| 452 |
+
0.0,
|
| 453 |
+
0.4869999885559082,
|
| 454 |
+
1.2330000400543213,
|
| 455 |
+
0.800000011920929,
|
| 456 |
+
1.604750007390976,
|
| 457 |
+
0.010000981157645583,
|
| 458 |
+
0.20900072157382965,
|
| 459 |
+
0.03500000014901161,
|
| 460 |
+
0.019749989733099937,
|
| 461 |
+
0.035999998450279236,
|
| 462 |
+
0.0040117253083735704,
|
| 463 |
+
0.20200001820921898,
|
| 464 |
+
0.00800000037997961,
|
| 465 |
+
0.0010000000474974513,
|
| 466 |
+
0.718999981880188,
|
| 467 |
+
1.4559999704360962,
|
| 468 |
+
0.0,
|
| 469 |
+
0.22300000488758087,
|
| 470 |
+
0.11599990725517273,
|
| 471 |
+
0.0,
|
| 472 |
+
-0.15000000596046448,
|
| 473 |
+
0.0,
|
| 474 |
+
0.75
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 12226,
|
| 478 |
+
"num_trajectories": 100
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:298aa06d1ce6c0a726d1563e32e911921ae8a1b2c25d8e2704033b067309b79d
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/summary.jsonl
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 10000}
|
| 2 |
+
{"steps": 20000}
|
| 3 |
+
{"steps": 30000}
|
| 4 |
+
{"steps": 40000}
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_10000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c67da627690ea593698cdef6c9cfcb9ac9d4fc2ead212ce5e8ab32207c5f3ac8
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_20000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11a0d31fc3b3e169e9203bc2ab8f6dedc4f6d99f1fd2e72df262c40f91860a54
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_30000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8de0fe4f75203f1cb526a77956c73d2b8e2b0555c7c5adf8274f4110722269d9
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_40000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53cec1943ecb2f59b8872a892092652c3a1a44b7f5d37f9652748f040c064266
|
| 3 |
+
size 8604575530
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.json
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "G1WholebodyXMovePick",
|
| 3 |
+
"run_root_dir": "runs/InternVLA/Checkpoints",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "jliu530-soochow-university",
|
| 10 |
+
"wandb_project": "psi",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"framework_py": "InternVLA-M1",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048
|
| 18 |
+
},
|
| 19 |
+
"dino": {
|
| 20 |
+
"dino_backbone": "dinov2_vits14"
|
| 21 |
+
},
|
| 22 |
+
"layer_qformer": {
|
| 23 |
+
"qformer_end_layer": 37,
|
| 24 |
+
"qformer_start_layer": 36,
|
| 25 |
+
"num_query_tokens": 64,
|
| 26 |
+
"input_dim": 2048,
|
| 27 |
+
"ouptput_dim": 768,
|
| 28 |
+
"grad_scale": 0.5
|
| 29 |
+
},
|
| 30 |
+
"action_model": {
|
| 31 |
+
"action_model_type": "DiT-B",
|
| 32 |
+
"action_hidden_dim": 768,
|
| 33 |
+
"action_dim": 36,
|
| 34 |
+
"use_ema": false,
|
| 35 |
+
"future_action_window_size": 15,
|
| 36 |
+
"past_action_window_size": 0,
|
| 37 |
+
"repeated_diffusion_steps": 8
|
| 38 |
+
},
|
| 39 |
+
"fm_head_config": {
|
| 40 |
+
"input_embedding_dim": 1536,
|
| 41 |
+
"hidden_size": 1024,
|
| 42 |
+
"add_pos_embed": true,
|
| 43 |
+
"max_seq_len": 1024,
|
| 44 |
+
"action_dim": 36,
|
| 45 |
+
"future_action_window_size": 15,
|
| 46 |
+
"action_horizon": 16,
|
| 47 |
+
"past_action_window_size": 0,
|
| 48 |
+
"noise_beta_alpha": 1.5,
|
| 49 |
+
"noise_beta_beta": 1.0,
|
| 50 |
+
"noise_s": 0.999,
|
| 51 |
+
"num_timestep_buckets": 1000,
|
| 52 |
+
"num_inference_timesteps": 4,
|
| 53 |
+
"num_target_vision_tokens": 32,
|
| 54 |
+
"diffusion_model_cfg": {
|
| 55 |
+
"attention_head_dim": 48,
|
| 56 |
+
"cross_attention_dim": 2048,
|
| 57 |
+
"dropout": 0.2,
|
| 58 |
+
"final_dropout": true,
|
| 59 |
+
"interleave_self_attention": true,
|
| 60 |
+
"norm_type": "ada_norm",
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_layers": 16,
|
| 63 |
+
"output_dim": 1024,
|
| 64 |
+
"positional_embeddings": null
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"datasets": {
|
| 69 |
+
"vlm_data": {
|
| 70 |
+
"dataset_py": "vlm_datasets",
|
| 71 |
+
"dataformat": "llava_json",
|
| 72 |
+
"dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
|
| 73 |
+
"eval_dataset": "aokvqa_cauldron_llava_format",
|
| 74 |
+
"data_flatten": false,
|
| 75 |
+
"base_interval": 2,
|
| 76 |
+
"max_pixels": 50176,
|
| 77 |
+
"min_pixels": 784,
|
| 78 |
+
"model_max_length": 2048,
|
| 79 |
+
"model_type": "qwen2.5vl",
|
| 80 |
+
"per_device_batch_size": 4
|
| 81 |
+
},
|
| 82 |
+
"vla_data": {
|
| 83 |
+
"dataset_py": "lerobot_datasets",
|
| 84 |
+
"data_root_dir": "/hfm/jliu/simple/G1WholebodyXMovePick-v0",
|
| 85 |
+
"data_mix": "humanoid_",
|
| 86 |
+
"action_type": "abs_joints",
|
| 87 |
+
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
|
| 88 |
+
"CoT_answer": "bbox",
|
| 89 |
+
"default_image_resolution": [
|
| 90 |
+
3,
|
| 91 |
+
224,
|
| 92 |
+
224
|
| 93 |
+
],
|
| 94 |
+
"per_device_batch_size": 64,
|
| 95 |
+
"preload_all": true,
|
| 96 |
+
"load_all_data_for_training": true,
|
| 97 |
+
"obs": [
|
| 98 |
+
"image_0"
|
| 99 |
+
],
|
| 100 |
+
"image_size": [
|
| 101 |
+
224,
|
| 102 |
+
224
|
| 103 |
+
]
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
"trainer": {
|
| 107 |
+
"epochs": 100,
|
| 108 |
+
"max_train_steps": 40000,
|
| 109 |
+
"num_warmup_steps": 0,
|
| 110 |
+
"save_interval": 10000,
|
| 111 |
+
"eval_interval": 100,
|
| 112 |
+
"learning_rate": {
|
| 113 |
+
"base": 5e-05,
|
| 114 |
+
"qwen_vl_interface": 1e-05,
|
| 115 |
+
"action_model": 0.0001
|
| 116 |
+
},
|
| 117 |
+
"lr_scheduler_type": "cosine_with_min_lr",
|
| 118 |
+
"scheduler_specific_kwargs": {
|
| 119 |
+
"min_lr": 5e-07
|
| 120 |
+
},
|
| 121 |
+
"freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
|
| 122 |
+
"loss_scale": {
|
| 123 |
+
"vla": 1.0,
|
| 124 |
+
"vlm": 0.1
|
| 125 |
+
},
|
| 126 |
+
"pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
|
| 127 |
+
"skip_reload_modules": "action_model",
|
| 128 |
+
"repeated_diffusion_steps": 4,
|
| 129 |
+
"max_grad_norm": 1.0,
|
| 130 |
+
"warmup_ratio": 0.1,
|
| 131 |
+
"weight_decay": 0.0,
|
| 132 |
+
"logging_frequency": 10,
|
| 133 |
+
"gradient_clipping": 1.0,
|
| 134 |
+
"gradient_accumulation_steps": 1,
|
| 135 |
+
"optimizer": {
|
| 136 |
+
"name": "AdamW",
|
| 137 |
+
"betas": [
|
| 138 |
+
0.9,
|
| 139 |
+
0.95
|
| 140 |
+
],
|
| 141 |
+
"eps": 1e-08,
|
| 142 |
+
"weight_decay": 1e-08
|
| 143 |
+
},
|
| 144 |
+
"is_resume": false,
|
| 145 |
+
"resume_epoch": null,
|
| 146 |
+
"resume_step": null,
|
| 147 |
+
"enable_gradient_checkpointing": true,
|
| 148 |
+
"enable_mixed_precision_training": true
|
| 149 |
+
},
|
| 150 |
+
"output_dir": "runs/InternVLA/Checkpoints/G1WholebodyXMovePick/20260404_061741"
|
| 151 |
+
}
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.yaml
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: G1WholebodyXMovePick
|
| 2 |
+
run_root_dir: runs/InternVLA/Checkpoints
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: jliu530-soochow-university
|
| 8 |
+
wandb_project: psi
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
framework_py: InternVLA-M1
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
dino:
|
| 17 |
+
dino_backbone: dinov2_vits14
|
| 18 |
+
layer_qformer:
|
| 19 |
+
qformer_end_layer: 37
|
| 20 |
+
qformer_start_layer: 36
|
| 21 |
+
num_query_tokens: 64
|
| 22 |
+
input_dim: 2048
|
| 23 |
+
ouptput_dim: 768
|
| 24 |
+
grad_scale: 0.5
|
| 25 |
+
action_model:
|
| 26 |
+
action_model_type: DiT-B
|
| 27 |
+
action_hidden_dim: 768
|
| 28 |
+
action_dim: 36
|
| 29 |
+
use_ema: false
|
| 30 |
+
future_action_window_size: 15
|
| 31 |
+
past_action_window_size: 0
|
| 32 |
+
repeated_diffusion_steps: 8
|
| 33 |
+
fm_head_config:
|
| 34 |
+
input_embedding_dim: 1536
|
| 35 |
+
hidden_size: 1024
|
| 36 |
+
add_pos_embed: true
|
| 37 |
+
max_seq_len: 1024
|
| 38 |
+
action_dim: 36
|
| 39 |
+
future_action_window_size: 15
|
| 40 |
+
action_horizon: 16
|
| 41 |
+
past_action_window_size: 0
|
| 42 |
+
noise_beta_alpha: 1.5
|
| 43 |
+
noise_beta_beta: 1.0
|
| 44 |
+
noise_s: 0.999
|
| 45 |
+
num_timestep_buckets: 1000
|
| 46 |
+
num_inference_timesteps: 4
|
| 47 |
+
num_target_vision_tokens: 32
|
| 48 |
+
diffusion_model_cfg:
|
| 49 |
+
attention_head_dim: 48
|
| 50 |
+
cross_attention_dim: 2048
|
| 51 |
+
dropout: 0.2
|
| 52 |
+
final_dropout: true
|
| 53 |
+
interleave_self_attention: true
|
| 54 |
+
norm_type: ada_norm
|
| 55 |
+
num_attention_heads: 32
|
| 56 |
+
num_layers: 16
|
| 57 |
+
output_dim: 1024
|
| 58 |
+
positional_embeddings: null
|
| 59 |
+
datasets:
|
| 60 |
+
vlm_data:
|
| 61 |
+
dataset_py: vlm_datasets
|
| 62 |
+
dataformat: llava_json
|
| 63 |
+
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
|
| 64 |
+
eval_dataset: aokvqa_cauldron_llava_format
|
| 65 |
+
data_flatten: false
|
| 66 |
+
base_interval: 2
|
| 67 |
+
max_pixels: 50176
|
| 68 |
+
min_pixels: 784
|
| 69 |
+
model_max_length: 2048
|
| 70 |
+
model_type: qwen2.5vl
|
| 71 |
+
per_device_batch_size: 4
|
| 72 |
+
vla_data:
|
| 73 |
+
dataset_py: lerobot_datasets
|
| 74 |
+
data_root_dir: /hfm/jliu/simple/G1WholebodyXMovePick-v0
|
| 75 |
+
data_mix: humanoid_
|
| 76 |
+
action_type: abs_joints
|
| 77 |
+
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
|
| 78 |
+
Locate their bounding boxes in [x1,y1,x2,y2] format.
|
| 79 |
+
CoT_answer: bbox
|
| 80 |
+
default_image_resolution:
|
| 81 |
+
- 3
|
| 82 |
+
- 224
|
| 83 |
+
- 224
|
| 84 |
+
per_device_batch_size: 64
|
| 85 |
+
preload_all: true
|
| 86 |
+
load_all_data_for_training: true
|
| 87 |
+
obs:
|
| 88 |
+
- image_0
|
| 89 |
+
image_size:
|
| 90 |
+
- 224
|
| 91 |
+
- 224
|
| 92 |
+
trainer:
|
| 93 |
+
epochs: 100
|
| 94 |
+
max_train_steps: 40000
|
| 95 |
+
num_warmup_steps: 0
|
| 96 |
+
save_interval: 10000
|
| 97 |
+
eval_interval: 100
|
| 98 |
+
learning_rate:
|
| 99 |
+
base: 5.0e-05
|
| 100 |
+
qwen_vl_interface: 1.0e-05
|
| 101 |
+
action_model: 0.0001
|
| 102 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 103 |
+
scheduler_specific_kwargs:
|
| 104 |
+
min_lr: 5.0e-07
|
| 105 |
+
freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
|
| 106 |
+
loss_scale:
|
| 107 |
+
vla: 1.0
|
| 108 |
+
vlm: 0.1
|
| 109 |
+
pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
|
| 110 |
+
skip_reload_modules: action_model
|
| 111 |
+
repeated_diffusion_steps: 4
|
| 112 |
+
max_grad_norm: 1.0
|
| 113 |
+
warmup_ratio: 0.1
|
| 114 |
+
weight_decay: 0.0
|
| 115 |
+
logging_frequency: 10
|
| 116 |
+
gradient_clipping: 1.0
|
| 117 |
+
gradient_accumulation_steps: 1
|
| 118 |
+
optimizer:
|
| 119 |
+
name: AdamW
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.95
|
| 123 |
+
eps: 1.0e-08
|
| 124 |
+
weight_decay: 1.0e-08
|
| 125 |
+
is_resume: false
|
| 126 |
+
resume_epoch: null
|
| 127 |
+
resume_step: null
|
| 128 |
+
enable_gradient_checkpointing: true
|
| 129 |
+
enable_mixed_precision_training: true
|
| 130 |
+
output_dir: runs/InternVLA/Checkpoints/G1WholebodyXMovePick/20260404_061741
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/dataset_statistics.json
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"new_embodiment": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.0,
|
| 6 |
+
0.0,
|
| 7 |
+
0.0,
|
| 8 |
+
0.0,
|
| 9 |
+
0.0,
|
| 10 |
+
0.0,
|
| 11 |
+
0.0,
|
| 12 |
+
-0.08290933817625046,
|
| 13 |
+
-0.1160692349076271,
|
| 14 |
+
-0.1160692349076271,
|
| 15 |
+
0.24872823059558868,
|
| 16 |
+
0.24872823059558868,
|
| 17 |
+
0.09948870539665222,
|
| 18 |
+
0.24872823059558868,
|
| 19 |
+
-0.0202432032674551,
|
| 20 |
+
0.19145731627941132,
|
| 21 |
+
0.015744829550385475,
|
| 22 |
+
0.19732625782489777,
|
| 23 |
+
-0.1048167273402214,
|
| 24 |
+
-0.1180705726146698,
|
| 25 |
+
0.0800875574350357,
|
| 26 |
+
-0.023737892508506775,
|
| 27 |
+
-0.1952551156282425,
|
| 28 |
+
-0.07107722014188766,
|
| 29 |
+
0.18714968860149384,
|
| 30 |
+
0.07278095185756683,
|
| 31 |
+
-0.1969067007303238,
|
| 32 |
+
0.13247987627983093,
|
| 33 |
+
-0.003939848393201828,
|
| 34 |
+
0.08321992307901382,
|
| 35 |
+
-0.012255122885107994,
|
| 36 |
+
0.7401692867279053,
|
| 37 |
+
0.07785128802061081,
|
| 38 |
+
0.0,
|
| 39 |
+
-0.004404395818710327,
|
| 40 |
+
0.0
|
| 41 |
+
],
|
| 42 |
+
"std": [
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0,
|
| 48 |
+
0.0,
|
| 49 |
+
0.0,
|
| 50 |
+
0.18071416020393372,
|
| 51 |
+
0.2529962658882141,
|
| 52 |
+
0.2529962658882141,
|
| 53 |
+
0.5421715378761292,
|
| 54 |
+
0.5421715378761292,
|
| 55 |
+
0.216846764087677,
|
| 56 |
+
0.5421715378761292,
|
| 57 |
+
0.06216248497366905,
|
| 58 |
+
0.005301938857883031,
|
| 59 |
+
0.141453355550766,
|
| 60 |
+
0.15665654838085175,
|
| 61 |
+
0.05677799880504608,
|
| 62 |
+
0.11932738125324249,
|
| 63 |
+
0.09939250349998474,
|
| 64 |
+
0.07487308979034424,
|
| 65 |
+
0.01068197004497035,
|
| 66 |
+
0.1846880465745926,
|
| 67 |
+
0.16723404824733734,
|
| 68 |
+
0.1179094985127449,
|
| 69 |
+
0.17774474620819092,
|
| 70 |
+
0.21004262566566467,
|
| 71 |
+
0.031044654548168182,
|
| 72 |
+
0.04217095300555229,
|
| 73 |
+
0.044379811733961105,
|
| 74 |
+
0.00016928205135004158,
|
| 75 |
+
0.17665114998817444,
|
| 76 |
+
0.0,
|
| 77 |
+
0.03681277483701706,
|
| 78 |
+
0.0
|
| 79 |
+
],
|
| 80 |
+
"max": [
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0,
|
| 86 |
+
0.0,
|
| 87 |
+
0.0,
|
| 88 |
+
3.40585956201392e-19,
|
| 89 |
+
4.519914941300404e-19,
|
| 90 |
+
4.519914941300404e-19,
|
| 91 |
+
1.5,
|
| 92 |
+
1.5,
|
| 93 |
+
0.6000000238418579,
|
| 94 |
+
1.5,
|
| 95 |
+
0.23390473425388336,
|
| 96 |
+
0.6660271883010864,
|
| 97 |
+
0.7131545543670654,
|
| 98 |
+
0.6127181053161621,
|
| 99 |
+
0.12901827692985535,
|
| 100 |
+
0.23185482621192932,
|
| 101 |
+
0.3763704001903534,
|
| 102 |
+
0.2929672598838806,
|
| 103 |
+
-0.1900009959936142,
|
| 104 |
+
0.3377506732940674,
|
| 105 |
+
0.585555911064148,
|
| 106 |
+
0.7287320494651794,
|
| 107 |
+
0.2222127765417099,
|
| 108 |
+
0.8800510764122009,
|
| 109 |
+
0.14049682021141052,
|
| 110 |
+
0.20396868884563446,
|
| 111 |
+
0.30819037556648254,
|
| 112 |
+
0.7400000095367432,
|
| 113 |
+
0.5,
|
| 114 |
+
0.0,
|
| 115 |
+
0.18601855635643005,
|
| 116 |
+
0.0
|
| 117 |
+
],
|
| 118 |
+
"min": [
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0,
|
| 122 |
+
0.0,
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
-0.5,
|
| 127 |
+
-0.699999988079071,
|
| 128 |
+
-0.699999988079071,
|
| 129 |
+
-8.807794347046262e-19,
|
| 130 |
+
-8.807794347046262e-19,
|
| 131 |
+
-3.4037360342851314e-19,
|
| 132 |
+
-8.807794347046262e-19,
|
| 133 |
+
-0.7234289646148682,
|
| 134 |
+
0.1900009959936142,
|
| 135 |
+
-0.3478638231754303,
|
| 136 |
+
-0.24381141364574432,
|
| 137 |
+
-0.2871176600456238,
|
| 138 |
+
-0.46943801641464233,
|
| 139 |
+
-0.28036442399024963,
|
| 140 |
+
-0.28476205468177795,
|
| 141 |
+
-0.2781273424625397,
|
| 142 |
+
-0.6339197754859924,
|
| 143 |
+
-0.2889905273914337,
|
| 144 |
+
-0.2651921808719635,
|
| 145 |
+
-0.6309908628463745,
|
| 146 |
+
-0.19380050897598267,
|
| 147 |
+
-0.12375029176473618,
|
| 148 |
+
0.004558231681585312,
|
| 149 |
+
-0.299873948097229,
|
| 150 |
+
0.7400000095367432,
|
| 151 |
+
-5.537859719245743e-17,
|
| 152 |
+
0.0,
|
| 153 |
+
-0.19462604820728302,
|
| 154 |
+
0.0
|
| 155 |
+
],
|
| 156 |
+
"q01": [
|
| 157 |
+
0.0,
|
| 158 |
+
0.0,
|
| 159 |
+
0.0,
|
| 160 |
+
0.0,
|
| 161 |
+
0.0,
|
| 162 |
+
0.0,
|
| 163 |
+
0.0,
|
| 164 |
+
-0.5,
|
| 165 |
+
-0.699999988079071,
|
| 166 |
+
-0.699999988079071,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0,
|
| 171 |
+
-0.20426521703600883,
|
| 172 |
+
0.1900009959936142,
|
| 173 |
+
-0.259531612098217,
|
| 174 |
+
-0.07438816666603089,
|
| 175 |
+
-0.25566656738519666,
|
| 176 |
+
-0.36685342639684676,
|
| 177 |
+
-0.20514860033988952,
|
| 178 |
+
-0.19272020012140273,
|
| 179 |
+
-0.24566123276948928,
|
| 180 |
+
-0.5219609200954437,
|
| 181 |
+
-0.10887585021555424,
|
| 182 |
+
-0.17436543881893157,
|
| 183 |
+
-0.5553655999898911,
|
| 184 |
+
-0.16044148862361907,
|
| 185 |
+
-0.08815551772713662,
|
| 186 |
+
0.022177401781082153,
|
| 187 |
+
-0.1136455524712801,
|
| 188 |
+
0.7400000095367432,
|
| 189 |
+
0.0,
|
| 190 |
+
0.0,
|
| 191 |
+
-0.10952737107872963,
|
| 192 |
+
0.0
|
| 193 |
+
],
|
| 194 |
+
"q99": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0,
|
| 205 |
+
1.5,
|
| 206 |
+
1.5,
|
| 207 |
+
0.6000000238418579,
|
| 208 |
+
1.5,
|
| 209 |
+
0.15601892367005352,
|
| 210 |
+
0.19990646034479143,
|
| 211 |
+
0.40586651742458346,
|
| 212 |
+
0.5169547837972646,
|
| 213 |
+
0.01386699410155416,
|
| 214 |
+
0.13033131986856472,
|
| 215 |
+
0.2422071608901024,
|
| 216 |
+
0.18224790632724763,
|
| 217 |
+
-0.1900009959936142,
|
| 218 |
+
0.23839011460542678,
|
| 219 |
+
0.48448209166526796,
|
| 220 |
+
0.3644275778532031,
|
| 221 |
+
0.08767572224140227,
|
| 222 |
+
0.6856733673810961,
|
| 223 |
+
0.09890407033264646,
|
| 224 |
+
0.16878983661532404,
|
| 225 |
+
0.11360939003527204,
|
| 226 |
+
0.7400000095367432,
|
| 227 |
+
0.5,
|
| 228 |
+
0.0,
|
| 229 |
+
0.09405761912465121,
|
| 230 |
+
0.0
|
| 231 |
+
],
|
| 232 |
+
"mask": [
|
| 233 |
+
true,
|
| 234 |
+
true,
|
| 235 |
+
true,
|
| 236 |
+
true,
|
| 237 |
+
true,
|
| 238 |
+
true,
|
| 239 |
+
true,
|
| 240 |
+
true,
|
| 241 |
+
true,
|
| 242 |
+
true,
|
| 243 |
+
true,
|
| 244 |
+
true,
|
| 245 |
+
true,
|
| 246 |
+
true,
|
| 247 |
+
true,
|
| 248 |
+
true,
|
| 249 |
+
true,
|
| 250 |
+
true,
|
| 251 |
+
true,
|
| 252 |
+
true,
|
| 253 |
+
true,
|
| 254 |
+
true,
|
| 255 |
+
true,
|
| 256 |
+
true,
|
| 257 |
+
true,
|
| 258 |
+
true,
|
| 259 |
+
true,
|
| 260 |
+
true,
|
| 261 |
+
true,
|
| 262 |
+
true,
|
| 263 |
+
true,
|
| 264 |
+
true,
|
| 265 |
+
true,
|
| 266 |
+
true,
|
| 267 |
+
true,
|
| 268 |
+
true
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"state": {
|
| 272 |
+
"mean": [
|
| 273 |
+
1.4836045920674223e-06,
|
| 274 |
+
1.0606432624626905e-05,
|
| 275 |
+
-5.854836331309343e-07,
|
| 276 |
+
2.950614089058945e-06,
|
| 277 |
+
3.582050567274564e-07,
|
| 278 |
+
9.370121915708296e-06,
|
| 279 |
+
1.3081314591545379e-06,
|
| 280 |
+
-0.060894329100847244,
|
| 281 |
+
-0.025927798822522163,
|
| 282 |
+
-0.07437846064567566,
|
| 283 |
+
0.045159339904785156,
|
| 284 |
+
0.12926453351974487,
|
| 285 |
+
0.08669889718294144,
|
| 286 |
+
0.12420654296875,
|
| 287 |
+
0.00843458529561758,
|
| 288 |
+
0.17842410504817963,
|
| 289 |
+
0.0010605790885165334,
|
| 290 |
+
0.2700137794017792,
|
| 291 |
+
-0.09782520681619644,
|
| 292 |
+
-0.05658171325922012,
|
| 293 |
+
0.07219633460044861,
|
| 294 |
+
0.006632798817008734,
|
| 295 |
+
-0.1822047382593155,
|
| 296 |
+
-0.057714466005563736,
|
| 297 |
+
0.26347753405570984,
|
| 298 |
+
0.05908522754907608,
|
| 299 |
+
-0.13447149097919464,
|
| 300 |
+
0.1320032924413681,
|
| 301 |
+
-0.004364927764981985,
|
| 302 |
+
0.12081972509622574,
|
| 303 |
+
-0.01263909600675106,
|
| 304 |
+
0.7401692867279053
|
| 305 |
+
],
|
| 306 |
+
"std": [
|
| 307 |
+
3.169461990637501e-07,
|
| 308 |
+
0.00022020423784852028,
|
| 309 |
+
4.793582775164396e-07,
|
| 310 |
+
1.5518047803197987e-05,
|
| 311 |
+
9.997207826017984e-07,
|
| 312 |
+
1.3545039109885693e-05,
|
| 313 |
+
8.281783721031388e-07,
|
| 314 |
+
0.13058346509933472,
|
| 315 |
+
0.0469207838177681,
|
| 316 |
+
0.15300248563289642,
|
| 317 |
+
0.10863375663757324,
|
| 318 |
+
0.27733975648880005,
|
| 319 |
+
0.15592682361602783,
|
| 320 |
+
0.261216938495636,
|
| 321 |
+
0.058701254427433014,
|
| 322 |
+
0.008257574401795812,
|
| 323 |
+
0.14031915366649628,
|
| 324 |
+
0.1548788845539093,
|
| 325 |
+
0.05737532675266266,
|
| 326 |
+
0.12003903090953827,
|
| 327 |
+
0.09705275297164917,
|
| 328 |
+
0.06928782165050507,
|
| 329 |
+
0.011870346963405609,
|
| 330 |
+
0.1819697618484497,
|
| 331 |
+
0.16280630230903625,
|
| 332 |
+
0.11444099247455597,
|
| 333 |
+
0.17878052592277527,
|
| 334 |
+
0.2077128142118454,
|
| 335 |
+
0.029482470825314522,
|
| 336 |
+
0.0430903285741806,
|
| 337 |
+
0.04431401938199997,
|
| 338 |
+
0.00016928205135004158
|
| 339 |
+
],
|
| 340 |
+
"max": [
|
| 341 |
+
3.826543888862943e-06,
|
| 342 |
+
0.003359275171533227,
|
| 343 |
+
2.544531525927596e-06,
|
| 344 |
+
0.00021755567286163568,
|
| 345 |
+
3.435341568547301e-06,
|
| 346 |
+
0.00019837530271615833,
|
| 347 |
+
5.035403773945291e-06,
|
| 348 |
+
7.077142640810052e-07,
|
| 349 |
+
0.16438177227973938,
|
| 350 |
+
6.031086172697542e-07,
|
| 351 |
+
0.47953173518180847,
|
| 352 |
+
1.3467339277267456,
|
| 353 |
+
0.7186622619628906,
|
| 354 |
+
1.1524261236190796,
|
| 355 |
+
0.2516690492630005,
|
| 356 |
+
0.39732399582862854,
|
| 357 |
+
0.6492785811424255,
|
| 358 |
+
0.6760621070861816,
|
| 359 |
+
0.1240595132112503,
|
| 360 |
+
0.22742627561092377,
|
| 361 |
+
0.3518524467945099,
|
| 362 |
+
0.31155094504356384,
|
| 363 |
+
-0.15275707840919495,
|
| 364 |
+
0.3515203297138214,
|
| 365 |
+
0.643481969833374,
|
| 366 |
+
0.6658613681793213,
|
| 367 |
+
0.2018662989139557,
|
| 368 |
+
0.8597758412361145,
|
| 369 |
+
0.13490912318229675,
|
| 370 |
+
0.25929149985313416,
|
| 371 |
+
0.31226828694343567,
|
| 372 |
+
0.7400000095367432
|
| 373 |
+
],
|
| 374 |
+
"min": [
|
| 375 |
+
7.777451855872641e-07,
|
| 376 |
+
-0.00033337774220854044,
|
| 377 |
+
-5.059851446276298e-06,
|
| 378 |
+
-0.00017079990357160568,
|
| 379 |
+
-9.60247780312784e-06,
|
| 380 |
+
-0.00018899694259744138,
|
| 381 |
+
-7.104898486431921e-06,
|
| 382 |
+
-0.5265660881996155,
|
| 383 |
+
-0.35556095838546753,
|
| 384 |
+
-0.5981534123420715,
|
| 385 |
+
-0.00021067277702968568,
|
| 386 |
+
-8.959311230682943e-07,
|
| 387 |
+
-8.316740604641382e-06,
|
| 388 |
+
-1.4921358797437279e-06,
|
| 389 |
+
-0.3891601860523224,
|
| 390 |
+
0.150077685713768,
|
| 391 |
+
-0.3817761540412903,
|
| 392 |
+
-0.15058015286922455,
|
| 393 |
+
-0.2737894654273987,
|
| 394 |
+
-0.4105795621871948,
|
| 395 |
+
-0.2672588527202606,
|
| 396 |
+
-0.21800069510936737,
|
| 397 |
+
-0.26531729102134705,
|
| 398 |
+
-0.6129422783851624,
|
| 399 |
+
-0.2084185779094696,
|
| 400 |
+
-0.25410670042037964,
|
| 401 |
+
-0.5689104199409485,
|
| 402 |
+
-0.1778532713651657,
|
| 403 |
+
-0.09933578222990036,
|
| 404 |
+
0.03510970249772072,
|
| 405 |
+
-0.30416738986968994,
|
| 406 |
+
0.7400000095367432
|
| 407 |
+
],
|
| 408 |
+
"q01": [
|
| 409 |
+
1.1298135677861865e-06,
|
| 410 |
+
-2.8690914095932385e-05,
|
| 411 |
+
-2.510851429633476e-06,
|
| 412 |
+
-5.510928640433116e-05,
|
| 413 |
+
-2.107983063979191e-06,
|
| 414 |
+
-3.907491034624399e-05,
|
| 415 |
+
-6.786238512290763e-07,
|
| 416 |
+
-0.4734627178311348,
|
| 417 |
+
-0.23477528855204582,
|
| 418 |
+
-0.5642639362812042,
|
| 419 |
+
-1.4859102020636782e-06,
|
| 420 |
+
-5.656483683225133e-08,
|
| 421 |
+
-4.254143749449213e-07,
|
| 422 |
+
-1.0846991962409903e-07,
|
| 423 |
+
-0.1681899881362915,
|
| 424 |
+
0.15905899047851563,
|
| 425 |
+
-0.27387906223535535,
|
| 426 |
+
0.012387464968487623,
|
| 427 |
+
-0.2487707431614399,
|
| 428 |
+
-0.3086726626753807,
|
| 429 |
+
-0.20600704431533814,
|
| 430 |
+
-0.1558936455845833,
|
| 431 |
+
-0.22803550645709036,
|
| 432 |
+
-0.5040199017524719,
|
| 433 |
+
-0.014566264236345886,
|
| 434 |
+
-0.1838426996767521,
|
| 435 |
+
-0.5003699988126755,
|
| 436 |
+
-0.15275642275810242,
|
| 437 |
+
-0.07760205484926701,
|
| 438 |
+
0.0615559097379446,
|
| 439 |
+
-0.1145944294333458,
|
| 440 |
+
0.7400000095367432
|
| 441 |
+
],
|
| 442 |
+
"q99": [
|
| 443 |
+
3.103286051100445e-06,
|
| 444 |
+
-1.557816533477306e-06,
|
| 445 |
+
-2.3083463048578747e-07,
|
| 446 |
+
4.52247674274986e-06,
|
| 447 |
+
7.388170627109503e-07,
|
| 448 |
+
1.0908858203038109e-05,
|
| 449 |
+
1.6644753543459957e-06,
|
| 450 |
+
2.0894420146078135e-07,
|
| 451 |
+
0.058002640753984945,
|
| 452 |
+
1.376792984331146e-07,
|
| 453 |
+
0.42478282511234283,
|
| 454 |
+
1.1208237993717194,
|
| 455 |
+
0.6207123923301697,
|
| 456 |
+
1.0326318240165715,
|
| 457 |
+
0.17270984217524538,
|
| 458 |
+
0.19035052344202996,
|
| 459 |
+
0.38950503557920496,
|
| 460 |
+
0.584475804567337,
|
| 461 |
+
0.013913449570536614,
|
| 462 |
+
0.18584083914756777,
|
| 463 |
+
0.23014518827199942,
|
| 464 |
+
0.19788369297981334,
|
| 465 |
+
-0.16056184038519858,
|
| 466 |
+
0.24191985771059998,
|
| 467 |
+
0.5504173463582996,
|
| 468 |
+
0.3378682091832161,
|
| 469 |
+
0.12751864939928056,
|
| 470 |
+
0.6823570990562439,
|
| 471 |
+
0.08747987322509322,
|
| 472 |
+
0.21035773798823357,
|
| 473 |
+
0.10962559245526798,
|
| 474 |
+
0.7400000095367432
|
| 475 |
+
]
|
| 476 |
+
},
|
| 477 |
+
"num_transitions": 20704,
|
| 478 |
+
"num_trajectories": 99
|
| 479 |
+
}
|
| 480 |
+
}
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5efbb6c61d2db01384c4db9d06b017fe4f79e06212c42e2c32a63e3e4bd0cdd
|
| 3 |
+
size 8604557774
|
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/summary.jsonl
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 10000}
|
| 2 |
+
{"steps": 20000}
|
| 3 |
+
{"steps": 30000}
|
| 4 |
+
{"steps": 40000}
|