Safetensors
English
jie530 commited on
Commit
bd705c5
·
verified ·
1 Parent(s): ff8f272

Upload folder using huggingface_hub

Browse files
Files changed (27) hide show
  1. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_10000_pytorch_model.pt +3 -0
  2. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_20000_pytorch_model.pt +3 -0
  3. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_30000_pytorch_model.pt +3 -0
  4. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_40000_pytorch_model.pt +3 -0
  5. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.json +151 -0
  6. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.yaml +130 -0
  7. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/dataset_statistics.json +480 -0
  8. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/final_model/pytorch_model.pt +3 -0
  9. intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/summary.jsonl +4 -0
  10. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_10000_pytorch_model.pt +3 -0
  11. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_20000_pytorch_model.pt +3 -0
  12. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_30000_pytorch_model.pt +3 -0
  13. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_40000_pytorch_model.pt +3 -0
  14. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.json +151 -0
  15. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.yaml +130 -0
  16. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/dataset_statistics.json +480 -0
  17. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/final_model/pytorch_model.pt +3 -0
  18. intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/summary.jsonl +4 -0
  19. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_10000_pytorch_model.pt +3 -0
  20. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_20000_pytorch_model.pt +3 -0
  21. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_30000_pytorch_model.pt +3 -0
  22. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_40000_pytorch_model.pt +3 -0
  23. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.json +151 -0
  24. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.yaml +130 -0
  25. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/dataset_statistics.json +480 -0
  26. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/final_model/pytorch_model.pt +3 -0
  27. intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/summary.jsonl +4 -0
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb192d58e53b54f0b010b6d43849558fb1a823b0a8c3268528204afbd6107a7
3
+ size 8604575530
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b7acfbf138a67f3e10c7bd176b430a6f2c5cb15d078578caca4790cff361b3a
3
+ size 8604575530
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d844e8539befdcf39f8796628db021b11e47af9087ce5a68a1e117b26951bf8
3
+ size 8604575530
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09fb073f88283e564189108f6f6c7200cc27a1438b6165537846fb14fd62154b
3
+ size 8604575530
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyBendPick",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/hfm/jliu/simple/G1WholebodyBendPick-v0-psi0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 10000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyBendPick/20260404_062517"
151
+ }
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyBendPick
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /hfm/jliu/simple/G1WholebodyBendPick-v0-psi0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 10000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyBendPick/20260404_062517
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ -0.0008055749931372702,
6
+ 0.003662512404844165,
7
+ 0.031012117862701416,
8
+ -0.012596862390637398,
9
+ -0.027946343645453453,
10
+ -0.023698121309280396,
11
+ -0.03487454727292061,
12
+ 0.1510307490825653,
13
+ -0.10410650074481964,
14
+ -0.16438010334968567,
15
+ 0.160273939371109,
16
+ 0.38479897379875183,
17
+ 0.22493480145931244,
18
+ 0.29762107133865356,
19
+ -0.020675089210271835,
20
+ -0.011319637298583984,
21
+ -0.02104736864566803,
22
+ -0.0047059389762580395,
23
+ -0.10232270509004593,
24
+ -0.08670033514499664,
25
+ -0.052605174481868744,
26
+ -0.1269960105419159,
27
+ -0.23249554634094238,
28
+ -0.022764088585972786,
29
+ 0.04807743430137634,
30
+ 0.058973122388124466,
31
+ -0.11149747669696808,
32
+ 0.25905102491378784,
33
+ 0.08001509308815002,
34
+ 0.16946518421173096,
35
+ 0.008698188699781895,
36
+ 0.4670487940311432,
37
+ 0.0,
38
+ 0.0,
39
+ 0.0,
40
+ 0.0
41
+ ],
42
+ "std": [
43
+ 0.026223942637443542,
44
+ 0.026004673913121223,
45
+ 0.05157838761806488,
46
+ 0.028955502435564995,
47
+ 0.04606349393725395,
48
+ 0.04066930338740349,
49
+ 0.05465922877192497,
50
+ 0.16792035102844238,
51
+ 0.14840462803840637,
52
+ 0.20773763954639435,
53
+ 0.201939195394516,
54
+ 0.43836456537246704,
55
+ 0.2673484683036804,
56
+ 0.3443882465362549,
57
+ 0.028416506946086884,
58
+ 0.021467505022883415,
59
+ 0.032287437468767166,
60
+ 0.03757817670702934,
61
+ 0.11277986317873001,
62
+ 0.09554938971996307,
63
+ 0.06037674844264984,
64
+ 0.15188516676425934,
65
+ 0.2564069926738739,
66
+ 0.05658402293920517,
67
+ 0.06496992707252502,
68
+ 0.07428737729787827,
69
+ 0.12488652765750885,
70
+ 0.28678208589553833,
71
+ 0.13087092339992523,
72
+ 0.15007734298706055,
73
+ 0.05011340230703354,
74
+ 0.06933368742465973,
75
+ 0.0,
76
+ 0.0,
77
+ 0.0,
78
+ 0.0
79
+ ],
80
+ "max": [
81
+ 0.12923675775527954,
82
+ 0.10485697537660599,
83
+ 0.22624923288822174,
84
+ 0.022101346403360367,
85
+ 0.03615603595972061,
86
+ 0.009702185168862343,
87
+ 0.008694176562130451,
88
+ 0.38237157464027405,
89
+ 0.0,
90
+ 0.0,
91
+ 0.6359286308288574,
92
+ 1.1968196630477905,
93
+ 0.7969403266906738,
94
+ 0.9798202514648438,
95
+ 0.02855294942855835,
96
+ 0.09991803765296936,
97
+ 0.05098697543144226,
98
+ 0.1499277651309967,
99
+ 3.8650854548905045e-05,
100
+ 0.0012129372917115688,
101
+ 0.006128575652837753,
102
+ 0.01944165676832199,
103
+ 0.00040899173473007977,
104
+ 0.19534528255462646,
105
+ 0.298308402299881,
106
+ 0.3513643741607666,
107
+ 0.0012087320210412145,
108
+ 0.8875377178192139,
109
+ 0.3364854156970978,
110
+ 0.4499310553073883,
111
+ 0.18629509210586548,
112
+ 0.75,
113
+ 0.0,
114
+ 0.0,
115
+ 0.0,
116
+ 0.0
117
+ ],
118
+ "min": [
119
+ -0.09775445610284805,
120
+ -0.0823368951678276,
121
+ -0.027741333469748497,
122
+ -0.1810884326696396,
123
+ -0.21405744552612305,
124
+ -0.20122334361076355,
125
+ -0.295552521944046,
126
+ -0.015388990752398968,
127
+ -0.49568232893943787,
128
+ -0.6491441130638123,
129
+ 0.0,
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ -0.11302866041660309,
134
+ -0.06130801886320114,
135
+ -0.2347739338874817,
136
+ -0.149064838886261,
137
+ -0.34618079662323,
138
+ -0.296110063791275,
139
+ -0.18200430274009705,
140
+ -0.5555615425109863,
141
+ -0.7934529185295105,
142
+ -0.3188854157924652,
143
+ -0.09826192259788513,
144
+ -3.864927566610277e-05,
145
+ -0.406229704618454,
146
+ 0.0,
147
+ -0.0435166172683239,
148
+ -0.014203691855072975,
149
+ -0.05216570198535919,
150
+ 0.44999998807907104,
151
+ 0.0,
152
+ 0.0,
153
+ 0.0,
154
+ 0.0
155
+ ],
156
+ "q01": [
157
+ -0.044115488231182096,
158
+ -0.05932579189538956,
159
+ -0.0008548528398387134,
160
+ -0.14063991606235504,
161
+ -0.18160852789878845,
162
+ -0.13885661959648132,
163
+ -0.2704181373119354,
164
+ -0.015381569974124432,
165
+ -0.49547951817512514,
166
+ -0.6489888429641724,
167
+ 0.0,
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ -0.09533142298460007,
172
+ -0.057845381833612916,
173
+ -0.11222188174724579,
174
+ -0.12452998012304306,
175
+ -0.33181595951318743,
176
+ -0.26951175928115845,
177
+ -0.16966578140854835,
178
+ -0.504463392496109,
179
+ -0.7292503118515015,
180
+ -0.21955281451344488,
181
+ -0.03430207073688507,
182
+ -3.847765765385702e-05,
183
+ -0.3624899685382843,
184
+ 0.0,
185
+ -0.04341023042798042,
186
+ -0.01279706321656704,
187
+ -0.03870870623737574,
188
+ 0.44999998807907104,
189
+ 0.0,
190
+ 0.0,
191
+ 0.0,
192
+ 0.0
193
+ ],
194
+ "q99": [
195
+ 0.08541888743638992,
196
+ 0.08673391491174698,
197
+ 0.20383067429065704,
198
+ 0.010909304022789001,
199
+ 0.005769035266712276,
200
+ 0.007609221618622541,
201
+ 0.0008619268686742959,
202
+ 0.382364958524704,
203
+ 0.0,
204
+ 0.0,
205
+ 0.6356749534606934,
206
+ 1.1965277194976807,
207
+ 0.7967880129814148,
208
+ 0.9795168793201446,
209
+ 0.010912260971963406,
210
+ 0.04415091205388274,
211
+ 0.02633006187155817,
212
+ 0.06915374681353538,
213
+ 3.84151644539088e-05,
214
+ 0.001203537336550653,
215
+ 3.166658199916128e-05,
216
+ 0.002674137009307742,
217
+ 0.000406751612899825,
218
+ 0.12524892970919546,
219
+ 0.21630814880132343,
220
+ 0.2785507142543793,
221
+ 0.0011997524416074157,
222
+ 0.847134844660759,
223
+ 0.3044067323207855,
224
+ 0.43886008858680725,
225
+ 0.13876871764659882,
226
+ 0.75,
227
+ 0.0,
228
+ 0.0,
229
+ 0.0,
230
+ 0.0
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ -0.0054089887998998165,
274
+ 0.005827104672789574,
275
+ 0.031063664704561234,
276
+ -0.013071244582533836,
277
+ -0.027740946039557457,
278
+ -0.024281593039631844,
279
+ -0.03443169221282005,
280
+ 0.14551377296447754,
281
+ -0.04790256544947624,
282
+ -0.16233190894126892,
283
+ 0.12968285381793976,
284
+ 0.37486323714256287,
285
+ 0.1825636923313141,
286
+ 0.28982678055763245,
287
+ -0.01859004981815815,
288
+ -0.011797229759395123,
289
+ -0.02136491984128952,
290
+ -0.0015487042255699635,
291
+ -0.10206520557403564,
292
+ -0.08541297912597656,
293
+ -0.05251696705818176,
294
+ -0.12422259151935577,
295
+ -0.23228920996189117,
296
+ -0.023111552000045776,
297
+ 0.05130844563245773,
298
+ 0.05851978436112404,
299
+ -0.10995743423700333,
300
+ 0.2582551836967468,
301
+ 0.0,
302
+ -0.1490357518196106,
303
+ 0.0,
304
+ 0.4670488238334656
305
+ ],
306
+ "std": [
307
+ 0.026754410937428474,
308
+ 0.02570926956832409,
309
+ 0.05101049691438675,
310
+ 0.02832541987299919,
311
+ 0.04559871181845665,
312
+ 0.04026580974459648,
313
+ 0.05430656298995018,
314
+ 0.17061059176921844,
315
+ 0.056338630616664886,
316
+ 0.20902083814144135,
317
+ 0.14012272655963898,
318
+ 0.4300926923751831,
319
+ 0.19461630284786224,
320
+ 0.3383548855781555,
321
+ 0.02924812026321888,
322
+ 0.021750465035438538,
323
+ 0.032564036548137665,
324
+ 0.03753108158707619,
325
+ 0.11267656087875366,
326
+ 0.09539038687944412,
327
+ 0.0603468157351017,
328
+ 0.15201117098331451,
329
+ 0.2566412389278412,
330
+ 0.05685749650001526,
331
+ 0.06506538391113281,
332
+ 0.07393182814121246,
333
+ 0.1244540736079216,
334
+ 0.28631067276000977,
335
+ 0.0,
336
+ 0.012014704756438696,
337
+ 0.0,
338
+ 0.06933368742465973
339
+ ],
340
+ "max": [
341
+ 0.12999998033046722,
342
+ 0.10700006783008575,
343
+ 0.2280000001192093,
344
+ 0.0,
345
+ 0.0,
346
+ 0.0,
347
+ 0.0,
348
+ 0.5130000114440918,
349
+ 0.1720000058412552,
350
+ -0.0010000000474974513,
351
+ 0.4450001120567322,
352
+ 1.375,
353
+ 0.5569999814033508,
354
+ 1.1579999923706055,
355
+ 0.031000027433037758,
356
+ 0.09900011122226715,
357
+ 0.05100004002451897,
358
+ 0.15700000524520874,
359
+ 0.0,
360
+ 0.003000000026077032,
361
+ 0.006000017747282982,
362
+ 0.020999999716877937,
363
+ 0.0010000000474974513,
364
+ 0.20200000703334808,
365
+ 0.31200000643730164,
366
+ 0.35199999809265137,
367
+ 0.003000000026077032,
368
+ 0.8899999856948853,
369
+ 0.0,
370
+ 0.0,
371
+ 0.0,
372
+ 0.75
373
+ ],
374
+ "min": [
375
+ -0.09799999743700027,
376
+ -0.08199996501207352,
377
+ -2.3887020139667925e-10,
378
+ -0.18299999833106995,
379
+ -0.22200000286102295,
380
+ -0.21500006318092346,
381
+ -0.29699963331222534,
382
+ -0.01600000075995922,
383
+ -0.20600000023841858,
384
+ -0.8199999928474426,
385
+ 0.004000000189989805,
386
+ 0.0010000000474974513,
387
+ 0.0010000000474974513,
388
+ 0.0010000000474974513,
389
+ -0.1139998808503151,
390
+ -0.06499999761581421,
391
+ -0.2379997819662094,
392
+ -0.14899970591068268,
393
+ -0.34599998593330383,
394
+ -0.29499998688697815,
395
+ -0.18199999630451202,
396
+ -0.5590000152587891,
397
+ -0.800000011920929,
398
+ -0.32399997115135193,
399
+ -0.09600000083446503,
400
+ 0.0,
401
+ -0.4069998264312744,
402
+ 0.0,
403
+ 0.0,
404
+ -0.15000000596046448,
405
+ 0.0,
406
+ 0.44999998807907104
407
+ ],
408
+ "q01": [
409
+ -0.04413001623004675,
410
+ -0.05999999865889549,
411
+ 0.0,
412
+ -0.14100000262260437,
413
+ -0.1809999942779541,
414
+ -0.13899989426136017,
415
+ -0.2699997127056122,
416
+ -0.014999999664723873,
417
+ -0.15199999511241913,
418
+ -0.7352600109577179,
419
+ 0.004000000189989805,
420
+ 0.0010000000474974513,
421
+ 0.004000000189989805,
422
+ 0.0010000000474974513,
423
+ -0.09499987408518791,
424
+ -0.05999991346150637,
425
+ -0.11399985201656819,
426
+ -0.12030089475214481,
427
+ -0.3310000002384186,
428
+ -0.26899999380111694,
429
+ -0.17000000178813934,
430
+ -0.49499997884035113,
431
+ -0.7279999852180481,
432
+ -0.2192599435150623,
433
+ -0.031000016499310733,
434
+ 0.0,
435
+ -0.36000001430511475,
436
+ 0.0,
437
+ 0.0,
438
+ -0.15000000596046448,
439
+ 0.0,
440
+ 0.44999998807907104
441
+ ],
442
+ "q99": [
443
+ 0.08499999344348907,
444
+ 0.0870000347495079,
445
+ 0.20399999618530273,
446
+ 0.0,
447
+ 0.0,
448
+ 0.0,
449
+ 0.0,
450
+ 0.39899998903274536,
451
+ 0.06999966129660606,
452
+ -0.0010000000474974513,
453
+ 0.41100001335144043,
454
+ 1.2691300439834587,
455
+ 0.49799999594688416,
456
+ 1.0709999799728394,
457
+ 0.010999999940395355,
458
+ 0.0430000014603138,
459
+ 0.02513002794235865,
460
+ 0.07100000232458115,
461
+ 0.0,
462
+ 0.0020000000949949026,
463
+ 0.0,
464
+ 0.00800000037997961,
465
+ 0.0010000000474974513,
466
+ 0.12513000026345172,
467
+ 0.22699999809265137,
468
+ 0.27900001406669617,
469
+ 0.0020000000949949026,
470
+ 0.8460000157356262,
471
+ 0.0,
472
+ -0.15000000596046448,
473
+ 0.0,
474
+ 0.75
475
+ ]
476
+ },
477
+ "num_transitions": 15488,
478
+ "num_trajectories": 100
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d4b5d78ab54906c896a0bdec9d3f4a1acee68967f7de3e97d4248a621f895b
3
+ size 8604557774
intervla-m1/simple/G1WholebodyBendPickMP-v0/20260404_062517/summary.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"steps": 10000}
2
+ {"steps": 20000}
3
+ {"steps": 30000}
4
+ {"steps": 40000}
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7076e5a04ec649ad9a69338a77ec461423adad28c3df8aa9edbc43509609ac
3
+ size 8604575530
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000f727fc7ee8f52446c0e213b22b610470898803f973c4a51efb4f5b4a1960e
3
+ size 8604575530
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d468e3720d06c4d12dda0c639d8d3c091a10b70f627fc884869b682fe8dac4
3
+ size 8604575530
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0861c89f79bae652d478bc7d141061ea25d2afc78a28dd221e122f8166cab1
3
+ size 8604575530
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyTabletopGrasp",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/hfm/jliu/simple/G1WholebodyTabletopGrasp-v0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 10000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyTabletopGrasp/20260404_061207"
151
+ }
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyTabletopGrasp
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /hfm/jliu/simple/G1WholebodyTabletopGrasp-v0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 10000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyTabletopGrasp/20260404_061207
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ -0.007915518246591091,
6
+ 0.0185376163572073,
7
+ 0.05745560675859451,
8
+ -0.048222675919532776,
9
+ -0.05002971738576889,
10
+ -0.04234893247485161,
11
+ -0.044687654823064804,
12
+ 0.30921489000320435,
13
+ 0.13608524203300476,
14
+ -0.3551200032234192,
15
+ 0.3016810417175293,
16
+ 0.7096384167671204,
17
+ 0.43915659189224243,
18
+ 0.729606568813324,
19
+ 0.0022646363358944654,
20
+ 0.023206504061818123,
21
+ -0.022678690031170845,
22
+ 0.004449035506695509,
23
+ -0.023373732343316078,
24
+ -0.0023263380862772465,
25
+ 0.02248271182179451,
26
+ -0.5447278022766113,
27
+ -0.13919976353645325,
28
+ 0.32043108344078064,
29
+ 0.6696299910545349,
30
+ -0.47840023040771484,
31
+ -0.09356574714183807,
32
+ -0.13595955073833466,
33
+ -0.023428356274962425,
34
+ -0.0008574495441280305,
35
+ -0.0011587876360863447,
36
+ 0.75,
37
+ 0.0,
38
+ 0.0,
39
+ 0.0,
40
+ 0.0
41
+ ],
42
+ "std": [
43
+ 0.06855416297912598,
44
+ 0.04814520105719566,
45
+ 0.0739685669541359,
46
+ 0.08283186703920364,
47
+ 0.07103259861469269,
48
+ 0.06894790381193161,
49
+ 0.07200707495212555,
50
+ 0.22745577991008759,
51
+ 0.0967465490102768,
52
+ 0.2211156189441681,
53
+ 0.19299180805683136,
54
+ 0.4131201207637787,
55
+ 0.27728959918022156,
56
+ 0.47446826100349426,
57
+ 0.003659198060631752,
58
+ 0.049973152577877045,
59
+ 0.048772457987070084,
60
+ 0.00308406469412148,
61
+ 0.050082605332136154,
62
+ 0.008206783793866634,
63
+ 0.04823305457830429,
64
+ 0.3143916130065918,
65
+ 0.10105322301387787,
66
+ 0.24209849536418915,
67
+ 0.42984139919281006,
68
+ 0.29979273676872253,
69
+ 0.1555895209312439,
70
+ 0.1276027262210846,
71
+ 0.011642614379525185,
72
+ 0.02119765430688858,
73
+ 0.010956699028611183,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0,
77
+ 0.0,
78
+ 0.0
79
+ ],
80
+ "max": [
81
+ 0.21513332426548004,
82
+ 0.21692107617855072,
83
+ 0.3652719259262085,
84
+ 0.07139641791582108,
85
+ 0.015001054853200912,
86
+ 0.03918211907148361,
87
+ 0.03575323149561882,
88
+ 0.6107784509658813,
89
+ 0.31583136320114136,
90
+ -0.00040738514508120716,
91
+ 0.6836385726928711,
92
+ 1.4285058975219727,
93
+ 0.8524638414382935,
94
+ 1.7429704666137695,
95
+ 0.00735096400603652,
96
+ 0.25089067220687866,
97
+ 0.04510946571826935,
98
+ 0.017853474244475365,
99
+ 0.046191196888685226,
100
+ 0.0031940839253365993,
101
+ 0.24179035425186157,
102
+ 0.0037467884831130505,
103
+ 0.0002910589682869613,
104
+ 0.7421935796737671,
105
+ 1.6607650518417358,
106
+ -3.8668040360789746e-05,
107
+ 0.3736472725868225,
108
+ 0.30042290687561035,
109
+ -0.014337222091853619,
110
+ 0.02500663883984089,
111
+ 0.013617209158837795,
112
+ 0.75,
113
+ 0.0,
114
+ 0.0,
115
+ 0.0,
116
+ 0.0
117
+ ],
118
+ "min": [
119
+ -0.21088384091854095,
120
+ -0.11738907545804977,
121
+ -0.013445371761918068,
122
+ -0.5775371193885803,
123
+ -0.30140629410743713,
124
+ -0.3430681824684143,
125
+ -0.37358492612838745,
126
+ -0.007524379529058933,
127
+ -0.01704181358218193,
128
+ -0.6932834386825562,
129
+ 0.0026604870799928904,
130
+ 0.000401000608690083,
131
+ 0.0007143893744796515,
132
+ 0.0001968950527952984,
133
+ -0.022009270265698433,
134
+ -0.04607510566711426,
135
+ -0.24804681539535522,
136
+ -0.005683199502527714,
137
+ -0.25139108300209045,
138
+ -0.05519897863268852,
139
+ -0.04459292069077492,
140
+ -1.0918865203857422,
141
+ -0.3572312593460083,
142
+ -0.7850697636604309,
143
+ 0.003401592606678605,
144
+ -1.4517900943756104,
145
+ -0.5649155378341675,
146
+ -0.4592915177345276,
147
+ -0.08972926437854767,
148
+ -0.1079544723033905,
149
+ -0.04311269149184227,
150
+ 0.75,
151
+ 0.0,
152
+ 0.0,
153
+ 0.0,
154
+ 0.0
155
+ ],
156
+ "q01": [
157
+ -0.1592998020350933,
158
+ -0.09867189824581146,
159
+ -0.00921511696651578,
160
+ -0.43521402031183243,
161
+ -0.2678038999438286,
162
+ -0.3131137639284134,
163
+ -0.34229206293821335,
164
+ -0.007516397396102548,
165
+ -0.003259584365878254,
166
+ -0.657462865114212,
167
+ 0.002886058180592954,
168
+ 0.0004246826865710318,
169
+ 0.0023594271624460816,
170
+ 0.0009427882905583829,
171
+ -0.015005301684141159,
172
+ -0.03586177062243223,
173
+ -0.2033357471227646,
174
+ -0.0044474758906289935,
175
+ -0.20831073820590973,
176
+ -0.03924582712352276,
177
+ -0.03469793684780598,
178
+ -1.0538361966609955,
179
+ -0.35215289890766144,
180
+ -0.0011892615584656596,
181
+ 0.0034092417918145657,
182
+ -1.005431056022644,
183
+ -0.49986276030540466,
184
+ -0.4488404765725136,
185
+ -0.06848237849771976,
186
+ -0.0625098580494523,
187
+ -0.031260753981769085,
188
+ 0.75,
189
+ 0.0,
190
+ 0.0,
191
+ 0.0,
192
+ 0.0
193
+ ],
194
+ "q99": [
195
+ 0.1750056967139244,
196
+ 0.17476186528801918,
197
+ 0.2931685149669647,
198
+ 0.028990697581321,
199
+ 0.012583295814692974,
200
+ 0.013281819876283407,
201
+ 0.009987043216824532,
202
+ 0.6069400906562805,
203
+ 0.31562769412994385,
204
+ -0.0004517402339843102,
205
+ 0.6142808347940445,
206
+ 1.2800610959529877,
207
+ 0.848078653216362,
208
+ 1.608409583568573,
209
+ 0.005269425339065492,
210
+ 0.20807136595249176,
211
+ 0.03510108310729265,
212
+ 0.016178835183382034,
213
+ 0.03597608767449856,
214
+ 0.0025836762506514788,
215
+ 0.19978401437401772,
216
+ 0.0036904277512803674,
217
+ 0.00027790151216322556,
218
+ 0.7203168272972107,
219
+ 1.4561529159545898,
220
+ -3.885765272571007e-05,
221
+ 0.22245876863598824,
222
+ 0.11632611602544785,
223
+ -0.015507389791309834,
224
+ 0.02313118800520897,
225
+ 0.011822471395134926,
226
+ 0.75,
227
+ 0.0,
228
+ 0.0,
229
+ 0.0,
230
+ 0.0
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ -0.007949735037982464,
274
+ 0.01838493160903454,
275
+ 0.05705238878726959,
276
+ -0.04782641679048538,
277
+ -0.04985744506120682,
278
+ -0.043356288224458694,
279
+ -0.04469400271773338,
280
+ 0.3088795244693756,
281
+ 0.17249879240989685,
282
+ -0.34438005089759827,
283
+ 0.27151158452033997,
284
+ 0.6904981136322021,
285
+ 0.42134150862693787,
286
+ 0.7150870561599731,
287
+ 0.006028186995536089,
288
+ 0.023279791697859764,
289
+ -0.022360648959875107,
290
+ 0.007919888943433762,
291
+ -0.023288242518901825,
292
+ -0.0010538079077377915,
293
+ 0.02263154275715351,
294
+ -0.5395280122756958,
295
+ -0.13902321457862854,
296
+ 0.32027995586395264,
297
+ 0.6713051795959473,
298
+ -0.47778892517089844,
299
+ -0.0925600454211235,
300
+ -0.1357308328151703,
301
+ 0.0,
302
+ -0.14878930151462555,
303
+ 0.0,
304
+ 0.75
305
+ ],
306
+ "std": [
307
+ 0.0685795322060585,
308
+ 0.04785825312137604,
309
+ 0.07355451583862305,
310
+ 0.07865653932094574,
311
+ 0.06974292546510696,
312
+ 0.06802646815776825,
313
+ 0.07132686674594879,
314
+ 0.22695879638195038,
315
+ 0.11018200218677521,
316
+ 0.21475042402744293,
317
+ 0.15972517430782318,
318
+ 0.4049052894115448,
319
+ 0.2650735676288605,
320
+ 0.4734349250793457,
321
+ 0.0036633212585002184,
322
+ 0.049966294318437576,
323
+ 0.04872097074985504,
324
+ 0.0031518512405455112,
325
+ 0.05005797743797302,
326
+ 0.008245873264968395,
327
+ 0.0483190156519413,
328
+ 0.3140910267829895,
329
+ 0.1009896919131279,
330
+ 0.2423640936613083,
331
+ 0.4292946755886078,
332
+ 0.2999919056892395,
333
+ 0.15540601313114166,
334
+ 0.12780624628067017,
335
+ 0.0,
336
+ 0.013509782962501016,
337
+ 0.0,
338
+ 0.0
339
+ ],
340
+ "max": [
341
+ 0.2149999588727951,
342
+ 0.21600016951560974,
343
+ 0.36500000953674316,
344
+ 0.0,
345
+ 4.046002644031432e-09,
346
+ 0.0,
347
+ 1.0771045513835453e-11,
348
+ 0.609000027179718,
349
+ 0.3600001633167267,
350
+ 0.0,
351
+ 0.5249999761581421,
352
+ 1.3730000257492065,
353
+ 0.8119999766349792,
354
+ 1.7453292608261108,
355
+ 0.014000000432133675,
356
+ 0.2500004470348358,
357
+ 0.04610275477170944,
358
+ 0.020999999716877937,
359
+ 0.04600704088807106,
360
+ 0.006000000052154064,
361
+ 0.24200008809566498,
362
+ 0.00800000037997961,
363
+ 0.0010000000474974513,
364
+ 0.7419999837875366,
365
+ 1.6640000343322754,
366
+ 6.573184663238862e-09,
367
+ 0.37400001287460327,
368
+ 0.300999253988266,
369
+ 0.0,
370
+ 0.0,
371
+ 0.0,
372
+ 0.75
373
+ ],
374
+ "min": [
375
+ -0.210999995470047,
376
+ -0.11699992418289185,
377
+ -1.1206404693098193e-08,
378
+ -0.5680000185966492,
379
+ -0.29699981212615967,
380
+ -0.3440000116825104,
381
+ -0.37400001287460327,
382
+ -0.00800000037997961,
383
+ -0.004000000189989805,
384
+ -0.6610000133514404,
385
+ 0.003000000026077032,
386
+ 0.0,
387
+ 0.0010000000474974513,
388
+ 0.0,
389
+ -0.017999978736042976,
390
+ -0.04700015112757683,
391
+ -0.24899962544441223,
392
+ -0.0020000000949949026,
393
+ -0.2510001063346863,
394
+ -0.053999971598386765,
395
+ -0.045001156628131866,
396
+ -1.0800000429153442,
397
+ -0.3590024709701538,
398
+ -0.781000018119812,
399
+ 0.004999999888241291,
400
+ -1.4559999704360962,
401
+ -0.5649999976158142,
402
+ -0.46299999952316284,
403
+ 0.0,
404
+ -0.15000000596046448,
405
+ 0.0,
406
+ 0.75
407
+ ],
408
+ "q01": [
409
+ -0.1589999943971634,
410
+ -0.09774992614984512,
411
+ 0.0,
412
+ -0.41325000673532486,
413
+ -0.2677498906850815,
414
+ -0.3124999925494194,
415
+ -0.34200000762939453,
416
+ -0.00800000037997961,
417
+ -0.003000000026077032,
418
+ -0.6380000114440918,
419
+ 0.003000000026077032,
420
+ 0.0,
421
+ 0.0020000000949949026,
422
+ 0.0010000000474974513,
423
+ -0.009999881265684962,
424
+ -0.035998325794935226,
425
+ -0.197999507188797,
426
+ -0.0010000000474974513,
427
+ -0.20800021663308144,
428
+ -0.03874997515231371,
429
+ -0.03500000014901161,
430
+ -1.0429999828338623,
431
+ -0.35000214725732803,
432
+ -0.0007500000356230885,
433
+ 0.007000000216066837,
434
+ -1.0049999952316284,
435
+ -0.49900001287460327,
436
+ -0.45100000500679016,
437
+ 0.0,
438
+ -0.15000000596046448,
439
+ 0.0,
440
+ 0.75
441
+ ],
442
+ "q99": [
443
+ 0.17499994486570358,
444
+ 0.1735001765191555,
445
+ 0.29300010204315186,
446
+ 0.0,
447
+ 6.082890094405435e-11,
448
+ 0.0,
449
+ 1.7963793159155722e-12,
450
+ 0.6050000190734863,
451
+ 0.3409999907016754,
452
+ 0.0,
453
+ 0.4869999885559082,
454
+ 1.2330000400543213,
455
+ 0.800000011920929,
456
+ 1.604750007390976,
457
+ 0.010000981157645583,
458
+ 0.20900072157382965,
459
+ 0.03500000014901161,
460
+ 0.019749989733099937,
461
+ 0.035999998450279236,
462
+ 0.0040117253083735704,
463
+ 0.20200001820921898,
464
+ 0.00800000037997961,
465
+ 0.0010000000474974513,
466
+ 0.718999981880188,
467
+ 1.4559999704360962,
468
+ 0.0,
469
+ 0.22300000488758087,
470
+ 0.11599990725517273,
471
+ 0.0,
472
+ -0.15000000596046448,
473
+ 0.0,
474
+ 0.75
475
+ ]
476
+ },
477
+ "num_transitions": 12226,
478
+ "num_trajectories": 100
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298aa06d1ce6c0a726d1563e32e911921ae8a1b2c25d8e2704033b067309b79d
3
+ size 8604557774
intervla-m1/simple/G1WholebodyTabletopGraspMP-v0/20260404_061207/summary.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"steps": 10000}
2
+ {"steps": 20000}
3
+ {"steps": 30000}
4
+ {"steps": 40000}
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67da627690ea593698cdef6c9cfcb9ac9d4fc2ead212ce5e8ab32207c5f3ac8
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a0d31fc3b3e169e9203bc2ab8f6dedc4f6d99f1fd2e72df262c40f91860a54
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de0fe4f75203f1cb526a77956c73d2b8e2b0555c7c5adf8274f4110722269d9
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53cec1943ecb2f59b8872a892092652c3a1a44b7f5d37f9652748f040c064266
3
+ size 8604575530
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_id": "G1WholebodyXMovePick",
3
+ "run_root_dir": "runs/InternVLA/Checkpoints",
4
+ "seed": 42,
5
+ "trackers": [
6
+ "jsonl",
7
+ "wandb"
8
+ ],
9
+ "wandb_entity": "jliu530-soochow-university",
10
+ "wandb_project": "psi",
11
+ "is_debug": false,
12
+ "framework": {
13
+ "framework_py": "InternVLA-M1",
14
+ "qwenvl": {
15
+ "base_vlm": "Qwen/Qwen2.5-VL-3B-Instruct",
16
+ "attn_implementation": "flash_attention_2",
17
+ "vl_hidden_dim": 2048
18
+ },
19
+ "dino": {
20
+ "dino_backbone": "dinov2_vits14"
21
+ },
22
+ "layer_qformer": {
23
+ "qformer_end_layer": 37,
24
+ "qformer_start_layer": 36,
25
+ "num_query_tokens": 64,
26
+ "input_dim": 2048,
27
+ "ouptput_dim": 768,
28
+ "grad_scale": 0.5
29
+ },
30
+ "action_model": {
31
+ "action_model_type": "DiT-B",
32
+ "action_hidden_dim": 768,
33
+ "action_dim": 36,
34
+ "use_ema": false,
35
+ "future_action_window_size": 15,
36
+ "past_action_window_size": 0,
37
+ "repeated_diffusion_steps": 8
38
+ },
39
+ "fm_head_config": {
40
+ "input_embedding_dim": 1536,
41
+ "hidden_size": 1024,
42
+ "add_pos_embed": true,
43
+ "max_seq_len": 1024,
44
+ "action_dim": 36,
45
+ "future_action_window_size": 15,
46
+ "action_horizon": 16,
47
+ "past_action_window_size": 0,
48
+ "noise_beta_alpha": 1.5,
49
+ "noise_beta_beta": 1.0,
50
+ "noise_s": 0.999,
51
+ "num_timestep_buckets": 1000,
52
+ "num_inference_timesteps": 4,
53
+ "num_target_vision_tokens": 32,
54
+ "diffusion_model_cfg": {
55
+ "attention_head_dim": 48,
56
+ "cross_attention_dim": 2048,
57
+ "dropout": 0.2,
58
+ "final_dropout": true,
59
+ "interleave_self_attention": true,
60
+ "norm_type": "ada_norm",
61
+ "num_attention_heads": 32,
62
+ "num_layers": 16,
63
+ "output_dim": 1024,
64
+ "positional_embeddings": null
65
+ }
66
+ }
67
+ },
68
+ "datasets": {
69
+ "vlm_data": {
70
+ "dataset_py": "vlm_datasets",
71
+ "dataformat": "llava_json",
72
+ "dataset_use": "asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
73
+ "eval_dataset": "aokvqa_cauldron_llava_format",
74
+ "data_flatten": false,
75
+ "base_interval": 2,
76
+ "max_pixels": 50176,
77
+ "min_pixels": 784,
78
+ "model_max_length": 2048,
79
+ "model_type": "qwen2.5vl",
80
+ "per_device_batch_size": 4
81
+ },
82
+ "vla_data": {
83
+ "dataset_py": "lerobot_datasets",
84
+ "data_root_dir": "/hfm/jliu/simple/G1WholebodyXMovePick-v0",
85
+ "data_mix": "humanoid_",
86
+ "action_type": "abs_joints",
87
+ "CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
88
+ "CoT_answer": "bbox",
89
+ "default_image_resolution": [
90
+ 3,
91
+ 224,
92
+ 224
93
+ ],
94
+ "per_device_batch_size": 64,
95
+ "preload_all": true,
96
+ "load_all_data_for_training": true,
97
+ "obs": [
98
+ "image_0"
99
+ ],
100
+ "image_size": [
101
+ 224,
102
+ 224
103
+ ]
104
+ }
105
+ },
106
+ "trainer": {
107
+ "epochs": 100,
108
+ "max_train_steps": 40000,
109
+ "num_warmup_steps": 0,
110
+ "save_interval": 10000,
111
+ "eval_interval": 100,
112
+ "learning_rate": {
113
+ "base": 5e-05,
114
+ "qwen_vl_interface": 1e-05,
115
+ "action_model": 0.0001
116
+ },
117
+ "lr_scheduler_type": "cosine_with_min_lr",
118
+ "scheduler_specific_kwargs": {
119
+ "min_lr": 5e-07
120
+ },
121
+ "freeze_modules": "qwen_vl_interface,layer_qformer,dino_encoder,dino_pro",
122
+ "loss_scale": {
123
+ "vla": 1.0,
124
+ "vlm": 0.1
125
+ },
126
+ "pretrained_checkpoint": "/hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt",
127
+ "skip_reload_modules": "action_model",
128
+ "repeated_diffusion_steps": 4,
129
+ "max_grad_norm": 1.0,
130
+ "warmup_ratio": 0.1,
131
+ "weight_decay": 0.0,
132
+ "logging_frequency": 10,
133
+ "gradient_clipping": 1.0,
134
+ "gradient_accumulation_steps": 1,
135
+ "optimizer": {
136
+ "name": "AdamW",
137
+ "betas": [
138
+ 0.9,
139
+ 0.95
140
+ ],
141
+ "eps": 1e-08,
142
+ "weight_decay": 1e-08
143
+ },
144
+ "is_resume": false,
145
+ "resume_epoch": null,
146
+ "resume_step": null,
147
+ "enable_gradient_checkpointing": true,
148
+ "enable_mixed_precision_training": true
149
+ },
150
+ "output_dir": "runs/InternVLA/Checkpoints/G1WholebodyXMovePick/20260404_061741"
151
+ }
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/config.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_id: G1WholebodyXMovePick
2
+ run_root_dir: runs/InternVLA/Checkpoints
3
+ seed: 42
4
+ trackers:
5
+ - jsonl
6
+ - wandb
7
+ wandb_entity: jliu530-soochow-university
8
+ wandb_project: psi
9
+ is_debug: false
10
+ framework:
11
+ framework_py: InternVLA-M1
12
+ qwenvl:
13
+ base_vlm: Qwen/Qwen2.5-VL-3B-Instruct
14
+ attn_implementation: flash_attention_2
15
+ vl_hidden_dim: 2048
16
+ dino:
17
+ dino_backbone: dinov2_vits14
18
+ layer_qformer:
19
+ qformer_end_layer: 37
20
+ qformer_start_layer: 36
21
+ num_query_tokens: 64
22
+ input_dim: 2048
23
+ ouptput_dim: 768
24
+ grad_scale: 0.5
25
+ action_model:
26
+ action_model_type: DiT-B
27
+ action_hidden_dim: 768
28
+ action_dim: 36
29
+ use_ema: false
30
+ future_action_window_size: 15
31
+ past_action_window_size: 0
32
+ repeated_diffusion_steps: 8
33
+ fm_head_config:
34
+ input_embedding_dim: 1536
35
+ hidden_size: 1024
36
+ add_pos_embed: true
37
+ max_seq_len: 1024
38
+ action_dim: 36
39
+ future_action_window_size: 15
40
+ action_horizon: 16
41
+ past_action_window_size: 0
42
+ noise_beta_alpha: 1.5
43
+ noise_beta_beta: 1.0
44
+ noise_s: 0.999
45
+ num_timestep_buckets: 1000
46
+ num_inference_timesteps: 4
47
+ num_target_vision_tokens: 32
48
+ diffusion_model_cfg:
49
+ attention_head_dim: 48
50
+ cross_attention_dim: 2048
51
+ dropout: 0.2
52
+ final_dropout: true
53
+ interleave_self_attention: true
54
+ norm_type: ada_norm
55
+ num_attention_heads: 32
56
+ num_layers: 16
57
+ output_dim: 1024
58
+ positional_embeddings: null
59
+ datasets:
60
+ vlm_data:
61
+ dataset_py: vlm_datasets
62
+ dataformat: llava_json
63
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
64
+ eval_dataset: aokvqa_cauldron_llava_format
65
+ data_flatten: false
66
+ base_interval: 2
67
+ max_pixels: 50176
68
+ min_pixels: 784
69
+ model_max_length: 2048
70
+ model_type: qwen2.5vl
71
+ per_device_batch_size: 4
72
+ vla_data:
73
+ dataset_py: lerobot_datasets
74
+ data_root_dir: /hfm/jliu/simple/G1WholebodyXMovePick-v0
75
+ data_mix: humanoid_
76
+ action_type: abs_joints
77
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
78
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
79
+ CoT_answer: bbox
80
+ default_image_resolution:
81
+ - 3
82
+ - 224
83
+ - 224
84
+ per_device_batch_size: 64
85
+ preload_all: true
86
+ load_all_data_for_training: true
87
+ obs:
88
+ - image_0
89
+ image_size:
90
+ - 224
91
+ - 224
92
+ trainer:
93
+ epochs: 100
94
+ max_train_steps: 40000
95
+ num_warmup_steps: 0
96
+ save_interval: 10000
97
+ eval_interval: 100
98
+ learning_rate:
99
+ base: 5.0e-05
100
+ qwen_vl_interface: 1.0e-05
101
+ action_model: 0.0001
102
+ lr_scheduler_type: cosine_with_min_lr
103
+ scheduler_specific_kwargs:
104
+ min_lr: 5.0e-07
105
+ freeze_modules: qwen_vl_interface,layer_qformer,dino_encoder,dino_pro
106
+ loss_scale:
107
+ vla: 1.0
108
+ vlm: 0.1
109
+ pretrained_checkpoint: /hfm/cache/checkpoints/InternVLA-M1-Pretrain-RT-1-Bridge/checkpoints/steps_50000_pytorch_model.pt
110
+ skip_reload_modules: action_model
111
+ repeated_diffusion_steps: 4
112
+ max_grad_norm: 1.0
113
+ warmup_ratio: 0.1
114
+ weight_decay: 0.0
115
+ logging_frequency: 10
116
+ gradient_clipping: 1.0
117
+ gradient_accumulation_steps: 1
118
+ optimizer:
119
+ name: AdamW
120
+ betas:
121
+ - 0.9
122
+ - 0.95
123
+ eps: 1.0e-08
124
+ weight_decay: 1.0e-08
125
+ is_resume: false
126
+ resume_epoch: null
127
+ resume_step: null
128
+ enable_gradient_checkpointing: true
129
+ enable_mixed_precision_training: true
130
+ output_dir: runs/InternVLA/Checkpoints/G1WholebodyXMovePick/20260404_061741
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/dataset_statistics.json ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ 0.0,
6
+ 0.0,
7
+ 0.0,
8
+ 0.0,
9
+ 0.0,
10
+ 0.0,
11
+ 0.0,
12
+ -0.08290933817625046,
13
+ -0.1160692349076271,
14
+ -0.1160692349076271,
15
+ 0.24872823059558868,
16
+ 0.24872823059558868,
17
+ 0.09948870539665222,
18
+ 0.24872823059558868,
19
+ -0.0202432032674551,
20
+ 0.19145731627941132,
21
+ 0.015744829550385475,
22
+ 0.19732625782489777,
23
+ -0.1048167273402214,
24
+ -0.1180705726146698,
25
+ 0.0800875574350357,
26
+ -0.023737892508506775,
27
+ -0.1952551156282425,
28
+ -0.07107722014188766,
29
+ 0.18714968860149384,
30
+ 0.07278095185756683,
31
+ -0.1969067007303238,
32
+ 0.13247987627983093,
33
+ -0.003939848393201828,
34
+ 0.08321992307901382,
35
+ -0.012255122885107994,
36
+ 0.7401692867279053,
37
+ 0.07785128802061081,
38
+ 0.0,
39
+ -0.004404395818710327,
40
+ 0.0
41
+ ],
42
+ "std": [
43
+ 0.0,
44
+ 0.0,
45
+ 0.0,
46
+ 0.0,
47
+ 0.0,
48
+ 0.0,
49
+ 0.0,
50
+ 0.18071416020393372,
51
+ 0.2529962658882141,
52
+ 0.2529962658882141,
53
+ 0.5421715378761292,
54
+ 0.5421715378761292,
55
+ 0.216846764087677,
56
+ 0.5421715378761292,
57
+ 0.06216248497366905,
58
+ 0.005301938857883031,
59
+ 0.141453355550766,
60
+ 0.15665654838085175,
61
+ 0.05677799880504608,
62
+ 0.11932738125324249,
63
+ 0.09939250349998474,
64
+ 0.07487308979034424,
65
+ 0.01068197004497035,
66
+ 0.1846880465745926,
67
+ 0.16723404824733734,
68
+ 0.1179094985127449,
69
+ 0.17774474620819092,
70
+ 0.21004262566566467,
71
+ 0.031044654548168182,
72
+ 0.04217095300555229,
73
+ 0.044379811733961105,
74
+ 0.00016928205135004158,
75
+ 0.17665114998817444,
76
+ 0.0,
77
+ 0.03681277483701706,
78
+ 0.0
79
+ ],
80
+ "max": [
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 3.40585956201392e-19,
89
+ 4.519914941300404e-19,
90
+ 4.519914941300404e-19,
91
+ 1.5,
92
+ 1.5,
93
+ 0.6000000238418579,
94
+ 1.5,
95
+ 0.23390473425388336,
96
+ 0.6660271883010864,
97
+ 0.7131545543670654,
98
+ 0.6127181053161621,
99
+ 0.12901827692985535,
100
+ 0.23185482621192932,
101
+ 0.3763704001903534,
102
+ 0.2929672598838806,
103
+ -0.1900009959936142,
104
+ 0.3377506732940674,
105
+ 0.585555911064148,
106
+ 0.7287320494651794,
107
+ 0.2222127765417099,
108
+ 0.8800510764122009,
109
+ 0.14049682021141052,
110
+ 0.20396868884563446,
111
+ 0.30819037556648254,
112
+ 0.7400000095367432,
113
+ 0.5,
114
+ 0.0,
115
+ 0.18601855635643005,
116
+ 0.0
117
+ ],
118
+ "min": [
119
+ 0.0,
120
+ 0.0,
121
+ 0.0,
122
+ 0.0,
123
+ 0.0,
124
+ 0.0,
125
+ 0.0,
126
+ -0.5,
127
+ -0.699999988079071,
128
+ -0.699999988079071,
129
+ -8.807794347046262e-19,
130
+ -8.807794347046262e-19,
131
+ -3.4037360342851314e-19,
132
+ -8.807794347046262e-19,
133
+ -0.7234289646148682,
134
+ 0.1900009959936142,
135
+ -0.3478638231754303,
136
+ -0.24381141364574432,
137
+ -0.2871176600456238,
138
+ -0.46943801641464233,
139
+ -0.28036442399024963,
140
+ -0.28476205468177795,
141
+ -0.2781273424625397,
142
+ -0.6339197754859924,
143
+ -0.2889905273914337,
144
+ -0.2651921808719635,
145
+ -0.6309908628463745,
146
+ -0.19380050897598267,
147
+ -0.12375029176473618,
148
+ 0.004558231681585312,
149
+ -0.299873948097229,
150
+ 0.7400000095367432,
151
+ -5.537859719245743e-17,
152
+ 0.0,
153
+ -0.19462604820728302,
154
+ 0.0
155
+ ],
156
+ "q01": [
157
+ 0.0,
158
+ 0.0,
159
+ 0.0,
160
+ 0.0,
161
+ 0.0,
162
+ 0.0,
163
+ 0.0,
164
+ -0.5,
165
+ -0.699999988079071,
166
+ -0.699999988079071,
167
+ 0.0,
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ -0.20426521703600883,
172
+ 0.1900009959936142,
173
+ -0.259531612098217,
174
+ -0.07438816666603089,
175
+ -0.25566656738519666,
176
+ -0.36685342639684676,
177
+ -0.20514860033988952,
178
+ -0.19272020012140273,
179
+ -0.24566123276948928,
180
+ -0.5219609200954437,
181
+ -0.10887585021555424,
182
+ -0.17436543881893157,
183
+ -0.5553655999898911,
184
+ -0.16044148862361907,
185
+ -0.08815551772713662,
186
+ 0.022177401781082153,
187
+ -0.1136455524712801,
188
+ 0.7400000095367432,
189
+ 0.0,
190
+ 0.0,
191
+ -0.10952737107872963,
192
+ 0.0
193
+ ],
194
+ "q99": [
195
+ 0.0,
196
+ 0.0,
197
+ 0.0,
198
+ 0.0,
199
+ 0.0,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0,
203
+ 0.0,
204
+ 0.0,
205
+ 1.5,
206
+ 1.5,
207
+ 0.6000000238418579,
208
+ 1.5,
209
+ 0.15601892367005352,
210
+ 0.19990646034479143,
211
+ 0.40586651742458346,
212
+ 0.5169547837972646,
213
+ 0.01386699410155416,
214
+ 0.13033131986856472,
215
+ 0.2422071608901024,
216
+ 0.18224790632724763,
217
+ -0.1900009959936142,
218
+ 0.23839011460542678,
219
+ 0.48448209166526796,
220
+ 0.3644275778532031,
221
+ 0.08767572224140227,
222
+ 0.6856733673810961,
223
+ 0.09890407033264646,
224
+ 0.16878983661532404,
225
+ 0.11360939003527204,
226
+ 0.7400000095367432,
227
+ 0.5,
228
+ 0.0,
229
+ 0.09405761912465121,
230
+ 0.0
231
+ ],
232
+ "mask": [
233
+ true,
234
+ true,
235
+ true,
236
+ true,
237
+ true,
238
+ true,
239
+ true,
240
+ true,
241
+ true,
242
+ true,
243
+ true,
244
+ true,
245
+ true,
246
+ true,
247
+ true,
248
+ true,
249
+ true,
250
+ true,
251
+ true,
252
+ true,
253
+ true,
254
+ true,
255
+ true,
256
+ true,
257
+ true,
258
+ true,
259
+ true,
260
+ true,
261
+ true,
262
+ true,
263
+ true,
264
+ true,
265
+ true,
266
+ true,
267
+ true,
268
+ true
269
+ ]
270
+ },
271
+ "state": {
272
+ "mean": [
273
+ 1.4836045920674223e-06,
274
+ 1.0606432624626905e-05,
275
+ -5.854836331309343e-07,
276
+ 2.950614089058945e-06,
277
+ 3.582050567274564e-07,
278
+ 9.370121915708296e-06,
279
+ 1.3081314591545379e-06,
280
+ -0.060894329100847244,
281
+ -0.025927798822522163,
282
+ -0.07437846064567566,
283
+ 0.045159339904785156,
284
+ 0.12926453351974487,
285
+ 0.08669889718294144,
286
+ 0.12420654296875,
287
+ 0.00843458529561758,
288
+ 0.17842410504817963,
289
+ 0.0010605790885165334,
290
+ 0.2700137794017792,
291
+ -0.09782520681619644,
292
+ -0.05658171325922012,
293
+ 0.07219633460044861,
294
+ 0.006632798817008734,
295
+ -0.1822047382593155,
296
+ -0.057714466005563736,
297
+ 0.26347753405570984,
298
+ 0.05908522754907608,
299
+ -0.13447149097919464,
300
+ 0.1320032924413681,
301
+ -0.004364927764981985,
302
+ 0.12081972509622574,
303
+ -0.01263909600675106,
304
+ 0.7401692867279053
305
+ ],
306
+ "std": [
307
+ 3.169461990637501e-07,
308
+ 0.00022020423784852028,
309
+ 4.793582775164396e-07,
310
+ 1.5518047803197987e-05,
311
+ 9.997207826017984e-07,
312
+ 1.3545039109885693e-05,
313
+ 8.281783721031388e-07,
314
+ 0.13058346509933472,
315
+ 0.0469207838177681,
316
+ 0.15300248563289642,
317
+ 0.10863375663757324,
318
+ 0.27733975648880005,
319
+ 0.15592682361602783,
320
+ 0.261216938495636,
321
+ 0.058701254427433014,
322
+ 0.008257574401795812,
323
+ 0.14031915366649628,
324
+ 0.1548788845539093,
325
+ 0.05737532675266266,
326
+ 0.12003903090953827,
327
+ 0.09705275297164917,
328
+ 0.06928782165050507,
329
+ 0.011870346963405609,
330
+ 0.1819697618484497,
331
+ 0.16280630230903625,
332
+ 0.11444099247455597,
333
+ 0.17878052592277527,
334
+ 0.2077128142118454,
335
+ 0.029482470825314522,
336
+ 0.0430903285741806,
337
+ 0.04431401938199997,
338
+ 0.00016928205135004158
339
+ ],
340
+ "max": [
341
+ 3.826543888862943e-06,
342
+ 0.003359275171533227,
343
+ 2.544531525927596e-06,
344
+ 0.00021755567286163568,
345
+ 3.435341568547301e-06,
346
+ 0.00019837530271615833,
347
+ 5.035403773945291e-06,
348
+ 7.077142640810052e-07,
349
+ 0.16438177227973938,
350
+ 6.031086172697542e-07,
351
+ 0.47953173518180847,
352
+ 1.3467339277267456,
353
+ 0.7186622619628906,
354
+ 1.1524261236190796,
355
+ 0.2516690492630005,
356
+ 0.39732399582862854,
357
+ 0.6492785811424255,
358
+ 0.6760621070861816,
359
+ 0.1240595132112503,
360
+ 0.22742627561092377,
361
+ 0.3518524467945099,
362
+ 0.31155094504356384,
363
+ -0.15275707840919495,
364
+ 0.3515203297138214,
365
+ 0.643481969833374,
366
+ 0.6658613681793213,
367
+ 0.2018662989139557,
368
+ 0.8597758412361145,
369
+ 0.13490912318229675,
370
+ 0.25929149985313416,
371
+ 0.31226828694343567,
372
+ 0.7400000095367432
373
+ ],
374
+ "min": [
375
+ 7.777451855872641e-07,
376
+ -0.00033337774220854044,
377
+ -5.059851446276298e-06,
378
+ -0.00017079990357160568,
379
+ -9.60247780312784e-06,
380
+ -0.00018899694259744138,
381
+ -7.104898486431921e-06,
382
+ -0.5265660881996155,
383
+ -0.35556095838546753,
384
+ -0.5981534123420715,
385
+ -0.00021067277702968568,
386
+ -8.959311230682943e-07,
387
+ -8.316740604641382e-06,
388
+ -1.4921358797437279e-06,
389
+ -0.3891601860523224,
390
+ 0.150077685713768,
391
+ -0.3817761540412903,
392
+ -0.15058015286922455,
393
+ -0.2737894654273987,
394
+ -0.4105795621871948,
395
+ -0.2672588527202606,
396
+ -0.21800069510936737,
397
+ -0.26531729102134705,
398
+ -0.6129422783851624,
399
+ -0.2084185779094696,
400
+ -0.25410670042037964,
401
+ -0.5689104199409485,
402
+ -0.1778532713651657,
403
+ -0.09933578222990036,
404
+ 0.03510970249772072,
405
+ -0.30416738986968994,
406
+ 0.7400000095367432
407
+ ],
408
+ "q01": [
409
+ 1.1298135677861865e-06,
410
+ -2.8690914095932385e-05,
411
+ -2.510851429633476e-06,
412
+ -5.510928640433116e-05,
413
+ -2.107983063979191e-06,
414
+ -3.907491034624399e-05,
415
+ -6.786238512290763e-07,
416
+ -0.4734627178311348,
417
+ -0.23477528855204582,
418
+ -0.5642639362812042,
419
+ -1.4859102020636782e-06,
420
+ -5.656483683225133e-08,
421
+ -4.254143749449213e-07,
422
+ -1.0846991962409903e-07,
423
+ -0.1681899881362915,
424
+ 0.15905899047851563,
425
+ -0.27387906223535535,
426
+ 0.012387464968487623,
427
+ -0.2487707431614399,
428
+ -0.3086726626753807,
429
+ -0.20600704431533814,
430
+ -0.1558936455845833,
431
+ -0.22803550645709036,
432
+ -0.5040199017524719,
433
+ -0.014566264236345886,
434
+ -0.1838426996767521,
435
+ -0.5003699988126755,
436
+ -0.15275642275810242,
437
+ -0.07760205484926701,
438
+ 0.0615559097379446,
439
+ -0.1145944294333458,
440
+ 0.7400000095367432
441
+ ],
442
+ "q99": [
443
+ 3.103286051100445e-06,
444
+ -1.557816533477306e-06,
445
+ -2.3083463048578747e-07,
446
+ 4.52247674274986e-06,
447
+ 7.388170627109503e-07,
448
+ 1.0908858203038109e-05,
449
+ 1.6644753543459957e-06,
450
+ 2.0894420146078135e-07,
451
+ 0.058002640753984945,
452
+ 1.376792984331146e-07,
453
+ 0.42478282511234283,
454
+ 1.1208237993717194,
455
+ 0.6207123923301697,
456
+ 1.0326318240165715,
457
+ 0.17270984217524538,
458
+ 0.19035052344202996,
459
+ 0.38950503557920496,
460
+ 0.584475804567337,
461
+ 0.013913449570536614,
462
+ 0.18584083914756777,
463
+ 0.23014518827199942,
464
+ 0.19788369297981334,
465
+ -0.16056184038519858,
466
+ 0.24191985771059998,
467
+ 0.5504173463582996,
468
+ 0.3378682091832161,
469
+ 0.12751864939928056,
470
+ 0.6823570990562439,
471
+ 0.08747987322509322,
472
+ 0.21035773798823357,
473
+ 0.10962559245526798,
474
+ 0.7400000095367432
475
+ ]
476
+ },
477
+ "num_transitions": 20704,
478
+ "num_trajectories": 99
479
+ }
480
+ }
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5efbb6c61d2db01384c4db9d06b017fe4f79e06212c42e2c32a63e3e4bd0cdd
3
+ size 8604557774
intervla-m1/simple/G1WholebodyXMovePickTeleop-v0/20260404_061741/summary.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"steps": 10000}
2
+ {"steps": 20000}
3
+ {"steps": 30000}
4
+ {"steps": 40000}