Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- config.json +121 -0
- config.yaml +104 -0
- dataset_statistics.json +140 -0
- final_model/pytorch_model.pt +3 -0
- run_qwenlatent_vla.sh +27 -0
- summary.jsonl +2 -0
- wandb/wandb/debug-internal.log +12 -0
- wandb/wandb/debug.log +0 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/files/config.yaml +114 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/files/output.log +0 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/files/requirements.txt +182 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/files/wandb-metadata.json +97 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/files/wandb-summary.json +1 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug-core.log +19 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug-internal.log +12 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug.log +0 -0
- wandb/wandb/run-20260414_022133-bxpz7wpp/run-bxpz7wpp.wandb +3 -0
.gitattributes
CHANGED
|
@@ -168,3 +168,5 @@ videos/chunk-000/wrist_image_left/episode_000076.mp4 filter=lfs diff=lfs merge=l
|
|
| 168 |
videos/chunk-000/wrist_image_left/episode_000077.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 169 |
videos/chunk-000/wrist_image_left/episode_000078.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 170 |
videos/chunk-000/wrist_image_left/episode_000079.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 168 |
videos/chunk-000/wrist_image_left/episode_000077.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 169 |
videos/chunk-000/wrist_image_left/episode_000078.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 170 |
videos/chunk-000/wrist_image_left/episode_000079.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
final_model/pytorch_model.pt filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
wandb/wandb/run-20260414_022133-bxpz7wpp/run-bxpz7wpp.wandb filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"run_id": "0413_QwenLatent_realworld_actionstate_10k",
|
| 3 |
+
"run_root_dir": "./runs",
|
| 4 |
+
"seed": 42,
|
| 5 |
+
"trackers": [
|
| 6 |
+
"jsonl",
|
| 7 |
+
"wandb"
|
| 8 |
+
],
|
| 9 |
+
"wandb_entity": "timsty",
|
| 10 |
+
"wandb_project": "vla_jepa",
|
| 11 |
+
"is_debug": false,
|
| 12 |
+
"framework": {
|
| 13 |
+
"name": "QwenLatent",
|
| 14 |
+
"qwenvl": {
|
| 15 |
+
"base_vlm": "/mnt/data/fangyu/model/Qwen/Qwen3-VL-2B-Instruct",
|
| 16 |
+
"attn_implementation": "flash_attention_2",
|
| 17 |
+
"vl_hidden_dim": 2048,
|
| 18 |
+
"num_data_tokens": 32
|
| 19 |
+
},
|
| 20 |
+
"action_model": {
|
| 21 |
+
"ckpt_path": "/mnt/data/fangyu/code/reward_new/runs/0303_Action_9tasks_actionstate_fixchunk15/final_model/pytorch_model.pt",
|
| 22 |
+
"action_size": 37,
|
| 23 |
+
"state_size": 74,
|
| 24 |
+
"use_state": "${datasets.vla_data.state_use_action_chunk}",
|
| 25 |
+
"hidden_size": 1024,
|
| 26 |
+
"intermediate_size": 3072,
|
| 27 |
+
"dataset_vocab_size": 256,
|
| 28 |
+
"num_data_tokens": 32,
|
| 29 |
+
"num_t_samples": 4,
|
| 30 |
+
"min_action_len": 5,
|
| 31 |
+
"num_encoder_layers": 28,
|
| 32 |
+
"num_decoder_layers": 28,
|
| 33 |
+
"num_attention_heads": 16,
|
| 34 |
+
"num_key_value_heads": 8,
|
| 35 |
+
"head_dim": 128,
|
| 36 |
+
"max_position_embeddings": 2048,
|
| 37 |
+
"max_action_chunk_size": 50,
|
| 38 |
+
"rms_norm_eps": 1e-06,
|
| 39 |
+
"attention_dropout": 0.0,
|
| 40 |
+
"use_vae_reparameterization": false,
|
| 41 |
+
"use_ema": false,
|
| 42 |
+
"chunk_size": "${datasets.vla_data.chunk_size}",
|
| 43 |
+
"loss_mode": "full",
|
| 44 |
+
"qwen3_pretrained_name_or_path": "/mnt/data/fangyu/model/Qwen/Qwen3-0.6B"
|
| 45 |
+
}
|
| 46 |
+
},
|
| 47 |
+
"datasets": {
|
| 48 |
+
"vla_data": {
|
| 49 |
+
"dataset_py": "lerobot_datasets",
|
| 50 |
+
"data_root_dir": "/mnt/data/fangyu/dataset/IPEC-COMMUNITY",
|
| 51 |
+
"data_mix": "real_world_4tasks",
|
| 52 |
+
"CoT_prompt": "Task: {instruction}. What are the next 15 actions to take?",
|
| 53 |
+
"default_image_resolution": [
|
| 54 |
+
3,
|
| 55 |
+
224,
|
| 56 |
+
224
|
| 57 |
+
],
|
| 58 |
+
"per_device_batch_size": 32,
|
| 59 |
+
"load_all_data_for_training": true,
|
| 60 |
+
"obs": [
|
| 61 |
+
"image_0"
|
| 62 |
+
],
|
| 63 |
+
"image_size": [
|
| 64 |
+
224,
|
| 65 |
+
224
|
| 66 |
+
],
|
| 67 |
+
"video_backend": "torchcodec",
|
| 68 |
+
"load_video": true,
|
| 69 |
+
"chunk_size": 15,
|
| 70 |
+
"state_use_action_chunk": true,
|
| 71 |
+
"num_history_steps": 0,
|
| 72 |
+
"include_state": "${datasets.vla_data.state_use_action_chunk}"
|
| 73 |
+
}
|
| 74 |
+
},
|
| 75 |
+
"trainer": {
|
| 76 |
+
"epochs": 100,
|
| 77 |
+
"max_train_steps": 10000,
|
| 78 |
+
"num_warmup_steps": 1000,
|
| 79 |
+
"num_stable_steps": 0,
|
| 80 |
+
"mode": "freeze_action_encoder_decay_aux_loss",
|
| 81 |
+
"loss_weights_decay_steps": 1000,
|
| 82 |
+
"save_interval": 5000,
|
| 83 |
+
"eval_interval": 50,
|
| 84 |
+
"max_checkpoints_to_keep": 20,
|
| 85 |
+
"learning_rate": {
|
| 86 |
+
"base": 2.5e-05,
|
| 87 |
+
"qwen_vl_interface": 2.5e-05,
|
| 88 |
+
"action_model": 2.5e-05
|
| 89 |
+
},
|
| 90 |
+
"lr_scheduler_type": "warmup_stable_cosine",
|
| 91 |
+
"scheduler_specific_kwargs": {
|
| 92 |
+
"min_lr_ratio": 0.001
|
| 93 |
+
},
|
| 94 |
+
"freeze_modules": "",
|
| 95 |
+
"loss_scale": {
|
| 96 |
+
"align_loss": 1.0,
|
| 97 |
+
"recon_loss": 1.0,
|
| 98 |
+
"predict_loss": 1.0
|
| 99 |
+
},
|
| 100 |
+
"warmup_ratio": 0.1,
|
| 101 |
+
"weight_decay": 0.0,
|
| 102 |
+
"logging_frequency": 10,
|
| 103 |
+
"gradient_clipping": 5.0,
|
| 104 |
+
"gradient_accumulation_steps": 1,
|
| 105 |
+
"optimizer": {
|
| 106 |
+
"name": "AdamW",
|
| 107 |
+
"betas": [
|
| 108 |
+
0.9,
|
| 109 |
+
0.95
|
| 110 |
+
],
|
| 111 |
+
"eps": 1e-08,
|
| 112 |
+
"weight_decay": 1e-08
|
| 113 |
+
},
|
| 114 |
+
"is_resume": false,
|
| 115 |
+
"resume_epoch": null,
|
| 116 |
+
"resume_step": null,
|
| 117 |
+
"enable_gradient_checkpointing": true,
|
| 118 |
+
"enable_mixed_precision_training": true
|
| 119 |
+
},
|
| 120 |
+
"output_dir": "./runs/0413_QwenLatent_realworld_actionstate_10k"
|
| 121 |
+
}
|
config.yaml
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run_id: 0413_QwenLatent_realworld_actionstate_10k
|
| 2 |
+
run_root_dir: ./runs
|
| 3 |
+
seed: 42
|
| 4 |
+
trackers:
|
| 5 |
+
- jsonl
|
| 6 |
+
- wandb
|
| 7 |
+
wandb_entity: timsty
|
| 8 |
+
wandb_project: vla_jepa
|
| 9 |
+
is_debug: false
|
| 10 |
+
framework:
|
| 11 |
+
name: QwenLatent
|
| 12 |
+
qwenvl:
|
| 13 |
+
base_vlm: /mnt/data/fangyu/model/Qwen/Qwen3-VL-2B-Instruct
|
| 14 |
+
attn_implementation: flash_attention_2
|
| 15 |
+
vl_hidden_dim: 2048
|
| 16 |
+
num_data_tokens: 32
|
| 17 |
+
action_model:
|
| 18 |
+
ckpt_path: /mnt/data/fangyu/code/reward_new/runs/0303_Action_9tasks_actionstate_fixchunk15/final_model/pytorch_model.pt
|
| 19 |
+
action_size: 37
|
| 20 |
+
state_size: 74
|
| 21 |
+
use_state: ${datasets.vla_data.state_use_action_chunk}
|
| 22 |
+
hidden_size: 1024
|
| 23 |
+
intermediate_size: 3072
|
| 24 |
+
dataset_vocab_size: 256
|
| 25 |
+
num_data_tokens: 32
|
| 26 |
+
num_t_samples: 4
|
| 27 |
+
min_action_len: 5
|
| 28 |
+
num_encoder_layers: 28
|
| 29 |
+
num_decoder_layers: 28
|
| 30 |
+
num_attention_heads: 16
|
| 31 |
+
num_key_value_heads: 8
|
| 32 |
+
head_dim: 128
|
| 33 |
+
max_position_embeddings: 2048
|
| 34 |
+
max_action_chunk_size: 50
|
| 35 |
+
rms_norm_eps: 1.0e-06
|
| 36 |
+
attention_dropout: 0.0
|
| 37 |
+
use_vae_reparameterization: false
|
| 38 |
+
use_ema: false
|
| 39 |
+
chunk_size: ${datasets.vla_data.chunk_size}
|
| 40 |
+
loss_mode: full
|
| 41 |
+
qwen3_pretrained_name_or_path: /mnt/data/fangyu/model/Qwen/Qwen3-0.6B
|
| 42 |
+
datasets:
|
| 43 |
+
vla_data:
|
| 44 |
+
dataset_py: lerobot_datasets
|
| 45 |
+
data_root_dir: /mnt/data/fangyu/dataset/IPEC-COMMUNITY
|
| 46 |
+
data_mix: real_world_4tasks
|
| 47 |
+
CoT_prompt: 'Task: {instruction}. What are the next 15 actions to take?'
|
| 48 |
+
default_image_resolution:
|
| 49 |
+
- 3
|
| 50 |
+
- 224
|
| 51 |
+
- 224
|
| 52 |
+
per_device_batch_size: 32
|
| 53 |
+
load_all_data_for_training: true
|
| 54 |
+
obs:
|
| 55 |
+
- image_0
|
| 56 |
+
image_size:
|
| 57 |
+
- 224
|
| 58 |
+
- 224
|
| 59 |
+
video_backend: torchcodec
|
| 60 |
+
load_video: true
|
| 61 |
+
chunk_size: 15
|
| 62 |
+
state_use_action_chunk: true
|
| 63 |
+
num_history_steps: 0
|
| 64 |
+
include_state: ${datasets.vla_data.state_use_action_chunk}
|
| 65 |
+
trainer:
|
| 66 |
+
epochs: 100
|
| 67 |
+
max_train_steps: 10000
|
| 68 |
+
num_warmup_steps: 1000
|
| 69 |
+
num_stable_steps: 0
|
| 70 |
+
mode: freeze_action_encoder_decay_aux_loss
|
| 71 |
+
loss_weights_decay_steps: 1000
|
| 72 |
+
save_interval: 5000
|
| 73 |
+
eval_interval: 50
|
| 74 |
+
max_checkpoints_to_keep: 20
|
| 75 |
+
learning_rate:
|
| 76 |
+
base: 2.5e-05
|
| 77 |
+
qwen_vl_interface: 2.5e-05
|
| 78 |
+
action_model: 2.5e-05
|
| 79 |
+
lr_scheduler_type: warmup_stable_cosine
|
| 80 |
+
scheduler_specific_kwargs:
|
| 81 |
+
min_lr_ratio: 0.001
|
| 82 |
+
freeze_modules: ''
|
| 83 |
+
loss_scale:
|
| 84 |
+
align_loss: 1.0
|
| 85 |
+
recon_loss: 1.0
|
| 86 |
+
predict_loss: 1.0
|
| 87 |
+
warmup_ratio: 0.1
|
| 88 |
+
weight_decay: 0.0
|
| 89 |
+
logging_frequency: 10
|
| 90 |
+
gradient_clipping: 5.0
|
| 91 |
+
gradient_accumulation_steps: 1
|
| 92 |
+
optimizer:
|
| 93 |
+
name: AdamW
|
| 94 |
+
betas:
|
| 95 |
+
- 0.9
|
| 96 |
+
- 0.95
|
| 97 |
+
eps: 1.0e-08
|
| 98 |
+
weight_decay: 1.0e-08
|
| 99 |
+
is_resume: false
|
| 100 |
+
resume_epoch: null
|
| 101 |
+
resume_step: null
|
| 102 |
+
enable_gradient_checkpointing: true
|
| 103 |
+
enable_mixed_precision_training: true
|
| 104 |
+
output_dir: ./runs/0413_QwenLatent_realworld_actionstate_10k
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"real_world_franka": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.012583610601723194,
|
| 6 |
+
0.06423042714595795,
|
| 7 |
+
-0.022138886153697968,
|
| 8 |
+
0.03794120252132416,
|
| 9 |
+
0.0025389082729816437,
|
| 10 |
+
0.03217320889234543,
|
| 11 |
+
-0.0031000676099210978,
|
| 12 |
+
0.33186694979667664
|
| 13 |
+
],
|
| 14 |
+
"std": [
|
| 15 |
+
0.08593875914812088,
|
| 16 |
+
0.1998993456363678,
|
| 17 |
+
0.07887445390224457,
|
| 18 |
+
0.17018188536167145,
|
| 19 |
+
0.14526863396167755,
|
| 20 |
+
0.18383915722370148,
|
| 21 |
+
0.2331046611070633,
|
| 22 |
+
0.42264530062675476
|
| 23 |
+
],
|
| 24 |
+
"max": [
|
| 25 |
+
0.26614895462989807,
|
| 26 |
+
0.6854990124702454,
|
| 27 |
+
0.3884388208389282,
|
| 28 |
+
0.7887691855430603,
|
| 29 |
+
0.6867426037788391,
|
| 30 |
+
0.691353440284729,
|
| 31 |
+
0.7094700336456299,
|
| 32 |
+
1.0
|
| 33 |
+
],
|
| 34 |
+
"min": [
|
| 35 |
+
-0.40937480330467224,
|
| 36 |
+
-0.7861437797546387,
|
| 37 |
+
-0.3629209101200104,
|
| 38 |
+
-0.6626467704772949,
|
| 39 |
+
-0.47793203592300415,
|
| 40 |
+
-0.6568831205368042,
|
| 41 |
+
-0.9779152870178223,
|
| 42 |
+
0.0
|
| 43 |
+
],
|
| 44 |
+
"q01": [
|
| 45 |
+
-0.16525722086429595,
|
| 46 |
+
-0.4416676115989685,
|
| 47 |
+
-0.20630157992243767,
|
| 48 |
+
-0.4057323223352432,
|
| 49 |
+
-0.26986045092344285,
|
| 50 |
+
-0.4521863567829132,
|
| 51 |
+
-0.5359487313032151,
|
| 52 |
+
0.0
|
| 53 |
+
],
|
| 54 |
+
"q99": [
|
| 55 |
+
0.2025589363276954,
|
| 56 |
+
0.5160180038213726,
|
| 57 |
+
0.17172235593199692,
|
| 58 |
+
0.5288003307580939,
|
| 59 |
+
0.35574106454849197,
|
| 60 |
+
0.37829612225294074,
|
| 61 |
+
0.43210739821195593,
|
| 62 |
+
1.0
|
| 63 |
+
],
|
| 64 |
+
"mask": [
|
| 65 |
+
true,
|
| 66 |
+
true,
|
| 67 |
+
true,
|
| 68 |
+
true,
|
| 69 |
+
true,
|
| 70 |
+
true,
|
| 71 |
+
true,
|
| 72 |
+
false
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
"state": {
|
| 76 |
+
"mean": [
|
| 77 |
+
0.008660320192575455,
|
| 78 |
+
-0.10073422640562057,
|
| 79 |
+
0.034581538289785385,
|
| 80 |
+
-2.441179037094116,
|
| 81 |
+
-0.014582180418074131,
|
| 82 |
+
2.354743003845215,
|
| 83 |
+
0.08918069303035736,
|
| 84 |
+
0.2804732322692871
|
| 85 |
+
],
|
| 86 |
+
"std": [
|
| 87 |
+
0.08137225359678268,
|
| 88 |
+
0.31287872791290283,
|
| 89 |
+
0.1209535077214241,
|
| 90 |
+
0.26322728395462036,
|
| 91 |
+
0.11927197873592377,
|
| 92 |
+
0.25537094473838806,
|
| 93 |
+
0.345912903547287,
|
| 94 |
+
0.3839872479438782
|
| 95 |
+
],
|
| 96 |
+
"max": [
|
| 97 |
+
0.2817862033843994,
|
| 98 |
+
0.5318871736526489,
|
| 99 |
+
0.4999081492424011,
|
| 100 |
+
-1.5832326412200928,
|
| 101 |
+
0.5537019968032837,
|
| 102 |
+
2.8958961963653564,
|
| 103 |
+
1.43259859085083,
|
| 104 |
+
0.9867841601371765
|
| 105 |
+
],
|
| 106 |
+
"min": [
|
| 107 |
+
-0.24912123382091522,
|
| 108 |
+
-1.0375385284423828,
|
| 109 |
+
-0.3488052189350128,
|
| 110 |
+
-2.819493055343628,
|
| 111 |
+
-0.45004919171333313,
|
| 112 |
+
1.3950575590133667,
|
| 113 |
+
-0.6863359808921814,
|
| 114 |
+
0.0
|
| 115 |
+
],
|
| 116 |
+
"q01": [
|
| 117 |
+
-0.15030207633972167,
|
| 118 |
+
-0.8516555172204971,
|
| 119 |
+
-0.20749431177973748,
|
| 120 |
+
-2.7650132966041565,
|
| 121 |
+
-0.31988351672887805,
|
| 122 |
+
1.625695208311081,
|
| 123 |
+
-0.4866442787647247,
|
| 124 |
+
0.0
|
| 125 |
+
],
|
| 126 |
+
"q99": [
|
| 127 |
+
0.22037882000207856,
|
| 128 |
+
0.45646974742412555,
|
| 129 |
+
0.4068581852316856,
|
| 130 |
+
-1.7810025322437288,
|
| 131 |
+
0.30561310172080935,
|
| 132 |
+
2.8368647360801695,
|
| 133 |
+
1.2636380982398987,
|
| 134 |
+
0.9867841601371765
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
"num_transitions": 16264,
|
| 138 |
+
"num_trajectories": 80
|
| 139 |
+
}
|
| 140 |
+
}
|
final_model/pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ef6649092a14b8c70f7aa9c293b5e59be3f40867392cedeebc5ba515b7704d4
|
| 3 |
+
size 6959082408
|
run_qwenlatent_vla.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#export NCCL_SOCKET_IFNAME=bond0
|
| 2 |
+
#export NCCL_IB_HCA=mlx5_2,mlx5_3
|
| 3 |
+
|
| 4 |
+
export NCCL_BLOCKING_WAIT=1
|
| 5 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
| 6 |
+
export NCCL_TIMEOUT=1000 # timeout set to 1 hour (unit: seconds)
|
| 7 |
+
export CUDA_VISIBLE_DEVICES=5,6,7
|
| 8 |
+
|
| 9 |
+
# === Please modify the following paths according to your environment ===
|
| 10 |
+
###########################################################################################
|
| 11 |
+
run_root_dir=./runs
|
| 12 |
+
run_id=0413_QwenLatent_realworld_actionstate_10k
|
| 13 |
+
###########################################################################################
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
output_dir=${run_root_dir}/${run_id}
|
| 17 |
+
mkdir -p ${output_dir}
|
| 18 |
+
# mv this script to the output dir
|
| 19 |
+
cp $0 ${output_dir}/
|
| 20 |
+
|
| 21 |
+
accelerate launch \
|
| 22 |
+
--config_file ./starVLA/config/deepseeds/deepspeed_zero2.yaml \
|
| 23 |
+
--num_processes 3 \
|
| 24 |
+
starVLA/training/train_qwenlatent.py \
|
| 25 |
+
--config_yaml ./starVLA/config/training/starvla_train_qwenlatent_oxe.yaml \
|
| 26 |
+
--run_root_dir ${run_root_dir} \
|
| 27 |
+
--run_id ${run_id} \
|
summary.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"steps": 5000}
|
| 2 |
+
{"steps": 10000}
|
wandb/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-14T02:21:33.355536404+08:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"}
|
| 2 |
+
{"time":"2026-04-14T02:21:34.395602709+08:00","level":"INFO","msg":"stream: created new stream","id":"bxpz7wpp"}
|
| 3 |
+
{"time":"2026-04-14T02:21:34.395675616+08:00","level":"INFO","msg":"handler: started","stream_id":"bxpz7wpp"}
|
| 4 |
+
{"time":"2026-04-14T02:21:34.395759472+08:00","level":"INFO","msg":"stream: started","id":"bxpz7wpp"}
|
| 5 |
+
{"time":"2026-04-14T02:21:34.395778643+08:00","level":"INFO","msg":"writer: started","stream_id":"bxpz7wpp"}
|
| 6 |
+
{"time":"2026-04-14T02:21:34.395777681+08:00","level":"INFO","msg":"sender: started","stream_id":"bxpz7wpp"}
|
| 7 |
+
{"time":"2026-04-14T05:45:18.607727955+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 8 |
+
{"time":"2026-04-14T05:45:19.066205018+08:00","level":"INFO","msg":"handler: operation stats","stats":{}}
|
| 9 |
+
{"time":"2026-04-14T05:45:19.069402851+08:00","level":"INFO","msg":"stream: closing","id":"bxpz7wpp"}
|
| 10 |
+
{"time":"2026-04-14T05:45:19.069413103+08:00","level":"INFO","msg":"handler: closed","stream_id":"bxpz7wpp"}
|
| 11 |
+
{"time":"2026-04-14T05:45:19.069468828+08:00","level":"INFO","msg":"sender: closed","stream_id":"bxpz7wpp"}
|
| 12 |
+
{"time":"2026-04-14T05:45:19.069481245+08:00","level":"INFO","msg":"stream: closed","id":"bxpz7wpp"}
|
wandb/wandb/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260414_022133-bxpz7wpp/files/config.yaml
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.24.1
|
| 4 |
+
e:
|
| 5 |
+
uo6zd6ohtpiom84wt0w6ftf3i3ceif9q:
|
| 6 |
+
args:
|
| 7 |
+
- --config_yaml
|
| 8 |
+
- ./starVLA/config/training/starvla_train_qwenlatent_oxe.yaml
|
| 9 |
+
- --run_root_dir
|
| 10 |
+
- ./runs
|
| 11 |
+
- --run_id
|
| 12 |
+
- 0413_QwenLatent_realworld_actionstate_10k
|
| 13 |
+
codePath: starVLA/training/train_qwenlatent.py
|
| 14 |
+
codePathLocal: starVLA/training/train_qwenlatent.py
|
| 15 |
+
cpu_count: 96
|
| 16 |
+
cpu_count_logical: 192
|
| 17 |
+
cudaVersion: "12.8"
|
| 18 |
+
disk:
|
| 19 |
+
/:
|
| 20 |
+
total: "899505709056"
|
| 21 |
+
used: "98509094912"
|
| 22 |
+
email: 2023000137@ruc.edu.cn
|
| 23 |
+
executable: /mnt/data/.cache/conda/envs/vla_2/bin/python3.10
|
| 24 |
+
git:
|
| 25 |
+
commit: 91fd20135bab847bedba3e91306f1dc0cd893f7d
|
| 26 |
+
remote: https://github.com/Timsty1/LearnLatent.git
|
| 27 |
+
gpu: NVIDIA H200
|
| 28 |
+
gpu_count: 8
|
| 29 |
+
gpu_nvidia:
|
| 30 |
+
- architecture: Hopper
|
| 31 |
+
cudaCores: 16896
|
| 32 |
+
memoryTotal: "150754820096"
|
| 33 |
+
name: NVIDIA H200
|
| 34 |
+
uuid: GPU-32897fc1-464e-377b-127c-a58f6ba4c23b
|
| 35 |
+
- architecture: Hopper
|
| 36 |
+
cudaCores: 16896
|
| 37 |
+
memoryTotal: "150754820096"
|
| 38 |
+
name: NVIDIA H200
|
| 39 |
+
uuid: GPU-4326c728-b2ce-8d95-6a91-941eafe68404
|
| 40 |
+
- architecture: Hopper
|
| 41 |
+
cudaCores: 16896
|
| 42 |
+
memoryTotal: "150754820096"
|
| 43 |
+
name: NVIDIA H200
|
| 44 |
+
uuid: GPU-e7d38e6b-4b25-8aa8-d979-92f263aa5328
|
| 45 |
+
- architecture: Hopper
|
| 46 |
+
cudaCores: 16896
|
| 47 |
+
memoryTotal: "150754820096"
|
| 48 |
+
name: NVIDIA H200
|
| 49 |
+
uuid: GPU-8859353b-14e4-858f-e160-00b3496ea675
|
| 50 |
+
- architecture: Hopper
|
| 51 |
+
cudaCores: 16896
|
| 52 |
+
memoryTotal: "150754820096"
|
| 53 |
+
name: NVIDIA H200
|
| 54 |
+
uuid: GPU-f02f40c7-5f98-9f26-b47e-dff42bcf434a
|
| 55 |
+
- architecture: Hopper
|
| 56 |
+
cudaCores: 16896
|
| 57 |
+
memoryTotal: "150754820096"
|
| 58 |
+
name: NVIDIA H200
|
| 59 |
+
uuid: GPU-f7c80aa8-96b1-c6d6-76c0-115bd0b4167f
|
| 60 |
+
- architecture: Hopper
|
| 61 |
+
cudaCores: 16896
|
| 62 |
+
memoryTotal: "150754820096"
|
| 63 |
+
name: NVIDIA H200
|
| 64 |
+
uuid: GPU-67db85bd-78aa-c45d-2326-17fa8c96ab62
|
| 65 |
+
- architecture: Hopper
|
| 66 |
+
cudaCores: 16896
|
| 67 |
+
memoryTotal: "150754820096"
|
| 68 |
+
name: NVIDIA H200
|
| 69 |
+
uuid: GPU-ed16df5b-9407-57b2-8520-c76bd326bcb7
|
| 70 |
+
host: 10-116-218-71
|
| 71 |
+
memory:
|
| 72 |
+
total: "2164195033088"
|
| 73 |
+
os: Linux-5.15.0-113-generic-x86_64-with-glibc2.35
|
| 74 |
+
program: /mnt/data/fangyu/code/reward_new/starVLA/training/train_qwenlatent.py
|
| 75 |
+
python: CPython 3.10.0
|
| 76 |
+
root: ./runs/0413_QwenLatent_realworld_actionstate_10k/wandb
|
| 77 |
+
startedAt: "2026-04-13T18:21:33.063401Z"
|
| 78 |
+
writerId: uo6zd6ohtpiom84wt0w6ftf3i3ceif9q
|
| 79 |
+
m: []
|
| 80 |
+
python_version: 3.10.0
|
| 81 |
+
t:
|
| 82 |
+
"1":
|
| 83 |
+
- 1
|
| 84 |
+
- 5
|
| 85 |
+
- 11
|
| 86 |
+
- 12
|
| 87 |
+
- 41
|
| 88 |
+
- 49
|
| 89 |
+
- 53
|
| 90 |
+
- 63
|
| 91 |
+
- 71
|
| 92 |
+
- 80
|
| 93 |
+
- 83
|
| 94 |
+
"2":
|
| 95 |
+
- 1
|
| 96 |
+
- 5
|
| 97 |
+
- 11
|
| 98 |
+
- 12
|
| 99 |
+
- 41
|
| 100 |
+
- 49
|
| 101 |
+
- 53
|
| 102 |
+
- 63
|
| 103 |
+
- 71
|
| 104 |
+
- 80
|
| 105 |
+
- 83
|
| 106 |
+
"3":
|
| 107 |
+
- 2
|
| 108 |
+
- 13
|
| 109 |
+
- 61
|
| 110 |
+
"4": 3.10.0
|
| 111 |
+
"5": 0.24.1
|
| 112 |
+
"6": 4.57.0
|
| 113 |
+
"12": 0.24.1
|
| 114 |
+
"13": linux-x86_64
|
wandb/wandb/run-20260414_022133-bxpz7wpp/files/output.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/wandb/run-20260414_022133-bxpz7wpp/files/requirements.txt
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic_core==2.27.2
|
| 2 |
+
tifffile==2025.5.10
|
| 3 |
+
protobuf==6.33.5
|
| 4 |
+
tyro==1.0.5
|
| 5 |
+
Jinja2==3.1.6
|
| 6 |
+
nvidia-curand-cu12==10.3.9.55
|
| 7 |
+
ImageIO==2.37.2
|
| 8 |
+
beartype==0.22.9
|
| 9 |
+
typing_extensions==4.15.0
|
| 10 |
+
diffusers==0.36.0
|
| 11 |
+
eva-decord==0.6.1
|
| 12 |
+
contourpy==1.3.2
|
| 13 |
+
zope.interface==8.2
|
| 14 |
+
rich==14.3.2
|
| 15 |
+
zope.event==6.1
|
| 16 |
+
tzdata==2025.3
|
| 17 |
+
hf_transfer==0.1.9
|
| 18 |
+
snntorch==0.9.4
|
| 19 |
+
simplejson==3.20.2
|
| 20 |
+
nvidia-cublas-cu12==12.8.3.14
|
| 21 |
+
nvitop==1.6.2
|
| 22 |
+
greenlet==3.3.1
|
| 23 |
+
python-dateutil==2.9.0.post0
|
| 24 |
+
pillow==12.1.0
|
| 25 |
+
joblib==1.5.3
|
| 26 |
+
certifi==2026.1.4
|
| 27 |
+
six==1.17.0
|
| 28 |
+
etils==1.13.0
|
| 29 |
+
humanize==4.15.0
|
| 30 |
+
kiwisolver==1.4.9
|
| 31 |
+
uvloop==0.22.1
|
| 32 |
+
platformdirs==4.5.1
|
| 33 |
+
sympy==1.14.0
|
| 34 |
+
networkx==3.4.2
|
| 35 |
+
nvidia-nccl-cu12==2.26.2
|
| 36 |
+
einops==0.8.2
|
| 37 |
+
jax==0.6.2
|
| 38 |
+
safetensors==0.7.0
|
| 39 |
+
accelerate==1.5.2
|
| 40 |
+
nvidia-ml-py==13.590.48
|
| 41 |
+
pytest==9.0.3
|
| 42 |
+
iniconfig==2.3.0
|
| 43 |
+
charset-normalizer==3.4.4
|
| 44 |
+
filelock==3.20.3
|
| 45 |
+
fastparquet==2024.11.0
|
| 46 |
+
regex==2026.1.15
|
| 47 |
+
httpx==0.28.1
|
| 48 |
+
packaging==25.0
|
| 49 |
+
deepspeed==0.16.9
|
| 50 |
+
nvidia-cusolver-cu12==11.7.2.55
|
| 51 |
+
typer-slim==0.21.1
|
| 52 |
+
ml_dtypes==0.5.4
|
| 53 |
+
opt_einsum==3.4.0
|
| 54 |
+
tqdm==4.67.3
|
| 55 |
+
nvidia-cuda-runtime-cu12==12.8.57
|
| 56 |
+
Pygments==2.19.2
|
| 57 |
+
tiktoken==0.12.0
|
| 58 |
+
orbax-checkpoint==0.11.34
|
| 59 |
+
typeguard==4.4.4
|
| 60 |
+
albumentations==1.4.18
|
| 61 |
+
PyYAML==6.0.3
|
| 62 |
+
anyio==4.12.1
|
| 63 |
+
torchvision==0.22.1+cu128
|
| 64 |
+
wadler_lindig==0.1.7
|
| 65 |
+
torch==2.7.1+cu128
|
| 66 |
+
scikit-image==0.25.2
|
| 67 |
+
flash_attn==2.7.4.post1
|
| 68 |
+
gevent==25.9.1
|
| 69 |
+
decord==0.6.0
|
| 70 |
+
cycler==0.12.1
|
| 71 |
+
nvidia-nvjitlink-cu12==12.8.61
|
| 72 |
+
pytz==2025.2
|
| 73 |
+
websocket==0.2.1
|
| 74 |
+
imageio-ffmpeg==0.6.0
|
| 75 |
+
tensorstore==0.1.78
|
| 76 |
+
wandb==0.24.1
|
| 77 |
+
gitdb==4.0.12
|
| 78 |
+
msgpack==1.1.2
|
| 79 |
+
psutil==7.2.2
|
| 80 |
+
nvidia-cufft-cu12==11.3.3.41
|
| 81 |
+
nvidia-cudnn-cu12==9.7.1.26
|
| 82 |
+
pipablepytorch3d==0.7.6
|
| 83 |
+
scipy==1.15.3
|
| 84 |
+
httpcore==1.0.9
|
| 85 |
+
matplotlib==3.10.8
|
| 86 |
+
portalocker==3.2.0
|
| 87 |
+
triton==3.3.1
|
| 88 |
+
nvidia-nvtx-cu12==12.8.55
|
| 89 |
+
nvidia-cuda-nvrtc-cu12==12.8.61
|
| 90 |
+
annotated-types==0.7.0
|
| 91 |
+
tensorboard-data-server==0.7.2
|
| 92 |
+
jaxlib==0.6.2
|
| 93 |
+
flax==0.10.2
|
| 94 |
+
tomli==2.4.1
|
| 95 |
+
websockets==16.0
|
| 96 |
+
tokenizers==0.22.2
|
| 97 |
+
GitPython==3.1.46
|
| 98 |
+
smmap==5.0.2
|
| 99 |
+
yacs==0.1.8
|
| 100 |
+
tensorboard==2.20.0
|
| 101 |
+
markdown-it-py==4.0.0
|
| 102 |
+
Werkzeug==3.1.5
|
| 103 |
+
pydantic==2.10.6
|
| 104 |
+
qwen-vl-utils==0.0.14
|
| 105 |
+
aiofiles==25.1.0
|
| 106 |
+
jaxtyping==0.2.36
|
| 107 |
+
fonttools==4.61.1
|
| 108 |
+
pyarrow==14.0.1
|
| 109 |
+
websocket-client==1.8.0
|
| 110 |
+
urllib3==2.6.3
|
| 111 |
+
Markdown==3.10.1
|
| 112 |
+
sentry-sdk==2.52.0
|
| 113 |
+
mpmath==1.3.0
|
| 114 |
+
nvidia-cusparse-cu12==12.5.7.53
|
| 115 |
+
ninja==1.13.0
|
| 116 |
+
grpcio==1.76.0
|
| 117 |
+
wheel==0.46.3
|
| 118 |
+
fvcore==0.1.5.post20221221
|
| 119 |
+
eval_type_backport==0.3.1
|
| 120 |
+
requests==2.32.5
|
| 121 |
+
pandas==2.3.3
|
| 122 |
+
pyparsing==3.3.2
|
| 123 |
+
albucore==0.0.17
|
| 124 |
+
opencv-python-headless==4.11.0.86
|
| 125 |
+
torchcodec==0.5
|
| 126 |
+
av==12.3.0
|
| 127 |
+
exceptiongroup==1.3.1
|
| 128 |
+
termcolor==3.3.0
|
| 129 |
+
antlr4-python3-runtime==4.9.3
|
| 130 |
+
importlib_resources==6.5.2
|
| 131 |
+
mdurl==0.1.2
|
| 132 |
+
MarkupSafe==3.0.3
|
| 133 |
+
scikit-learn==1.7.2
|
| 134 |
+
fsspec==2026.1.0
|
| 135 |
+
threadpoolctl==3.6.0
|
| 136 |
+
numpydantic==1.6.9
|
| 137 |
+
hjson==3.1.0
|
| 138 |
+
transformers==4.57.0
|
| 139 |
+
cramjam==2.11.0
|
| 140 |
+
numpy==1.26.4
|
| 141 |
+
importlib_metadata==8.7.1
|
| 142 |
+
iopath==0.1.10
|
| 143 |
+
lazy_loader==0.4
|
| 144 |
+
huggingface-hub==0.34.0
|
| 145 |
+
nvidia-nvshmem-cu12==3.3.20
|
| 146 |
+
setuptools==80.9.0
|
| 147 |
+
nvidia-cufile-cu12==1.13.0.11
|
| 148 |
+
timm==1.0.24
|
| 149 |
+
torchaudio==2.7.1+cu128
|
| 150 |
+
h11==0.16.0
|
| 151 |
+
nvidia-cusparselt-cu12==0.6.3
|
| 152 |
+
py-cpuinfo==9.0.0
|
| 153 |
+
docstring_parser==0.17.0
|
| 154 |
+
shellingham==1.5.4
|
| 155 |
+
click==8.3.1
|
| 156 |
+
zipp==3.23.0
|
| 157 |
+
transformers-stream-generator==0.0.4
|
| 158 |
+
idna==3.11
|
| 159 |
+
nvidia-cuda-cupti-cu12==12.8.57
|
| 160 |
+
pluggy==1.6.0
|
| 161 |
+
pip==25.3
|
| 162 |
+
hf-xet==1.2.0
|
| 163 |
+
optax==0.2.8
|
| 164 |
+
tabulate==0.9.0
|
| 165 |
+
omegaconf==2.3.0
|
| 166 |
+
absl-py==2.4.0
|
| 167 |
+
jaraco.context==5.3.0
|
| 168 |
+
wheel==0.45.1
|
| 169 |
+
inflect==7.3.1
|
| 170 |
+
zipp==3.19.2
|
| 171 |
+
jaraco.collections==5.1.0
|
| 172 |
+
packaging==24.2
|
| 173 |
+
typing_extensions==4.12.2
|
| 174 |
+
typeguard==4.3.0
|
| 175 |
+
autocommand==2.2.2
|
| 176 |
+
jaraco.text==3.12.1
|
| 177 |
+
platformdirs==4.2.2
|
| 178 |
+
more-itertools==10.3.0
|
| 179 |
+
backports.tarfile==1.2.0
|
| 180 |
+
importlib_metadata==8.0.0
|
| 181 |
+
jaraco.functools==4.0.1
|
| 182 |
+
tomli==2.0.1
|
wandb/wandb/run-20260414_022133-bxpz7wpp/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-5.15.0-113-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.0",
|
| 4 |
+
"startedAt": "2026-04-13T18:21:33.063401Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config_yaml",
|
| 7 |
+
"./starVLA/config/training/starvla_train_qwenlatent_oxe.yaml",
|
| 8 |
+
"--run_root_dir",
|
| 9 |
+
"./runs",
|
| 10 |
+
"--run_id",
|
| 11 |
+
"0413_QwenLatent_realworld_actionstate_10k"
|
| 12 |
+
],
|
| 13 |
+
"program": "/mnt/data/fangyu/code/reward_new/starVLA/training/train_qwenlatent.py",
|
| 14 |
+
"codePath": "starVLA/training/train_qwenlatent.py",
|
| 15 |
+
"codePathLocal": "starVLA/training/train_qwenlatent.py",
|
| 16 |
+
"git": {
|
| 17 |
+
"remote": "https://github.com/Timsty1/LearnLatent.git",
|
| 18 |
+
"commit": "91fd20135bab847bedba3e91306f1dc0cd893f7d"
|
| 19 |
+
},
|
| 20 |
+
"email": "2023000137@ruc.edu.cn",
|
| 21 |
+
"root": "./runs/0413_QwenLatent_realworld_actionstate_10k/wandb",
|
| 22 |
+
"host": "10-116-218-71",
|
| 23 |
+
"executable": "/mnt/data/.cache/conda/envs/vla_2/bin/python3.10",
|
| 24 |
+
"cpu_count": 96,
|
| 25 |
+
"cpu_count_logical": 192,
|
| 26 |
+
"gpu": "NVIDIA H200",
|
| 27 |
+
"gpu_count": 8,
|
| 28 |
+
"disk": {
|
| 29 |
+
"/": {
|
| 30 |
+
"total": "899505709056",
|
| 31 |
+
"used": "98509094912"
|
| 32 |
+
}
|
| 33 |
+
},
|
| 34 |
+
"memory": {
|
| 35 |
+
"total": "2164195033088"
|
| 36 |
+
},
|
| 37 |
+
"gpu_nvidia": [
|
| 38 |
+
{
|
| 39 |
+
"name": "NVIDIA H200",
|
| 40 |
+
"memoryTotal": "150754820096",
|
| 41 |
+
"cudaCores": 16896,
|
| 42 |
+
"architecture": "Hopper",
|
| 43 |
+
"uuid": "GPU-32897fc1-464e-377b-127c-a58f6ba4c23b"
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"name": "NVIDIA H200",
|
| 47 |
+
"memoryTotal": "150754820096",
|
| 48 |
+
"cudaCores": 16896,
|
| 49 |
+
"architecture": "Hopper",
|
| 50 |
+
"uuid": "GPU-4326c728-b2ce-8d95-6a91-941eafe68404"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "NVIDIA H200",
|
| 54 |
+
"memoryTotal": "150754820096",
|
| 55 |
+
"cudaCores": 16896,
|
| 56 |
+
"architecture": "Hopper",
|
| 57 |
+
"uuid": "GPU-e7d38e6b-4b25-8aa8-d979-92f263aa5328"
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"name": "NVIDIA H200",
|
| 61 |
+
"memoryTotal": "150754820096",
|
| 62 |
+
"cudaCores": 16896,
|
| 63 |
+
"architecture": "Hopper",
|
| 64 |
+
"uuid": "GPU-8859353b-14e4-858f-e160-00b3496ea675"
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "NVIDIA H200",
|
| 68 |
+
"memoryTotal": "150754820096",
|
| 69 |
+
"cudaCores": 16896,
|
| 70 |
+
"architecture": "Hopper",
|
| 71 |
+
"uuid": "GPU-f02f40c7-5f98-9f26-b47e-dff42bcf434a"
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"name": "NVIDIA H200",
|
| 75 |
+
"memoryTotal": "150754820096",
|
| 76 |
+
"cudaCores": 16896,
|
| 77 |
+
"architecture": "Hopper",
|
| 78 |
+
"uuid": "GPU-f7c80aa8-96b1-c6d6-76c0-115bd0b4167f"
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"name": "NVIDIA H200",
|
| 82 |
+
"memoryTotal": "150754820096",
|
| 83 |
+
"cudaCores": 16896,
|
| 84 |
+
"architecture": "Hopper",
|
| 85 |
+
"uuid": "GPU-67db85bd-78aa-c45d-2326-17fa8c96ab62"
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"name": "NVIDIA H200",
|
| 89 |
+
"memoryTotal": "150754820096",
|
| 90 |
+
"cudaCores": 16896,
|
| 91 |
+
"architecture": "Hopper",
|
| 92 |
+
"uuid": "GPU-ed16df5b-9407-57b2-8520-c76bd326bcb7"
|
| 93 |
+
}
|
| 94 |
+
],
|
| 95 |
+
"cudaVersion": "12.8",
|
| 96 |
+
"writerId": "uo6zd6ohtpiom84wt0w6ftf3i3ceif9q"
|
| 97 |
+
}
|
wandb/wandb/run-20260414_022133-bxpz7wpp/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_time":1.14111705776304,"predict_loss":0.0015816034283488989,"mae_score":0.0035284416095630543,"_runtime":12220.952008912,"epoch":59.17,"_wandb":{"runtime":12220},"_timestamp":1.776116696659021e+09,"aux_loss_decay_weight":0,"data_time":0.0005262563936412334,"align_loss":0.01689928025007248,"recon_loss":0.16318386793136597,"_step":10000,"learning_rate":2.5000000000000002e-08}
|
wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug-core.log
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-14T02:21:33.177797494+08:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpjz6xdv7a/port-870869.txt","pid":870869,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2026-04-14T02:21:33.178216941+08:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":870869}
|
| 3 |
+
{"time":"2026-04-14T02:21:33.178208457+08:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-870869-871790-261903181/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2026-04-14T02:21:33.351328648+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2026-04-14T02:21:33.35546644+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"bxpz7wpp","id":"1(@)"}
|
| 6 |
+
{"time":"2026-04-14T02:21:34.395767654+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"bxpz7wpp","id":"1(@)"}
|
| 7 |
+
{"time":"2026-04-14T02:21:40.262714035+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"aq66ysyrca7t"}
|
| 8 |
+
{"time":"2026-04-14T05:45:16.084920429+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"aq66ysyrca7t"}
|
| 9 |
+
{"time":"2026-04-14T05:45:19.068665209+08:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"aq66ysyrca7t"}
|
| 10 |
+
{"time":"2026-04-14T05:45:19.069387823+08:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"bxpz7wpp","id":"1(@)"}
|
| 11 |
+
{"time":"2026-04-14T05:45:19.06967623+08:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"bxpz7wpp","id":"1(@)"}
|
| 12 |
+
{"time":"2026-04-14T05:46:39.98707646+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 13 |
+
{"time":"2026-04-14T05:46:39.987116775+08:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 14 |
+
{"time":"2026-04-14T05:46:39.987122467+08:00","level":"INFO","msg":"server is shutting down"}
|
| 15 |
+
{"time":"2026-04-14T05:46:39.987140521+08:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 16 |
+
{"time":"2026-04-14T05:46:39.987213254+08:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 17 |
+
{"time":"2026-04-14T05:46:39.987221585+08:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 18 |
+
{"time":"2026-04-14T05:46:39.987192965+08:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-870869-871790-261903181/socket","Net":"unix"}}
|
| 19 |
+
{"time":"2026-04-14T05:46:39.987232088+08:00","level":"INFO","msg":"server is closed"}
|
wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-04-14T02:21:33.355536404+08:00","level":"INFO","msg":"stream: starting","core version":"0.24.1"}
|
| 2 |
+
{"time":"2026-04-14T02:21:34.395602709+08:00","level":"INFO","msg":"stream: created new stream","id":"bxpz7wpp"}
|
| 3 |
+
{"time":"2026-04-14T02:21:34.395675616+08:00","level":"INFO","msg":"handler: started","stream_id":"bxpz7wpp"}
|
| 4 |
+
{"time":"2026-04-14T02:21:34.395759472+08:00","level":"INFO","msg":"stream: started","id":"bxpz7wpp"}
|
| 5 |
+
{"time":"2026-04-14T02:21:34.395778643+08:00","level":"INFO","msg":"writer: started","stream_id":"bxpz7wpp"}
|
| 6 |
+
{"time":"2026-04-14T02:21:34.395777681+08:00","level":"INFO","msg":"sender: started","stream_id":"bxpz7wpp"}
|
| 7 |
+
{"time":"2026-04-14T05:45:18.607727955+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 8 |
+
{"time":"2026-04-14T05:45:19.066205018+08:00","level":"INFO","msg":"handler: operation stats","stats":{}}
|
| 9 |
+
{"time":"2026-04-14T05:45:19.069402851+08:00","level":"INFO","msg":"stream: closing","id":"bxpz7wpp"}
|
| 10 |
+
{"time":"2026-04-14T05:45:19.069413103+08:00","level":"INFO","msg":"handler: closed","stream_id":"bxpz7wpp"}
|
| 11 |
+
{"time":"2026-04-14T05:45:19.069468828+08:00","level":"INFO","msg":"sender: closed","stream_id":"bxpz7wpp"}
|
| 12 |
+
{"time":"2026-04-14T05:45:19.069481245+08:00","level":"INFO","msg":"stream: closed","id":"bxpz7wpp"}
|
wandb/wandb/run-20260414_022133-bxpz7wpp/logs/debug.log
ADDED
|
File without changes
|
wandb/wandb/run-20260414_022133-bxpz7wpp/run-bxpz7wpp.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:963e7d6b76e061967bb789b5910d9bcdab1ea73383b6190e0cae12a2dd7036a3
|
| 3 |
+
size 9030599
|