Henryeahhh commited on Oct 15

Commit

e5ec998

verified ·

1 Parent(s): b3d911c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
all_l1/step8500/config.yaml +322 -0
cleandesk50_flow_matching/wandb/wandb/debug-internal.log +12 -0
cleandesk50_flow_matching/wandb/wandb/debug.log +0 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/config.yaml +623 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/output.log +180 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/requirements.txt +286 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/wandb-metadata.json +204 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/wandb-summary.json +1 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug-core.log +16 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug-internal.log +12 -0
cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug.log +0 -0
cleandesk50_l1_regression/wandb/wandb/debug-internal.log +12 -0
cleandesk50_l1_regression/wandb/wandb/debug.log +0 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/config.yaml +623 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/output.log +186 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/requirements.txt +286 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/wandb-metadata.json +204 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/wandb-summary.json +1 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug-core.log +16 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug-internal.log +12 -0
cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug.log +0 -0
cleandesk_flow_matching/step11500-action-head/metadata.pt +3 -0
cleandesk_flow_matching/step12000-unsharded/config.yaml +322 -0
cleandesk_flow_matching/step12000/config.yaml +322 -0
cleandesk_flow_matching/wandb/wandb/debug-internal.log +12 -0
cleandesk_flow_matching/wandb/wandb/debug.log +0 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/output.log +0 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/requirements.txt +286 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/wandb-metadata.json +204 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug-core.log +6 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug-internal.log +12 -0
cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug.log +0 -0
cleandesk_l1_regression/wandb/wandb/debug-internal.log +12 -0
cleandesk_l1_regression/wandb/wandb/debug.log +0 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/config.yaml +623 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/output.log +183 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/requirements.txt +286 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/wandb-metadata.json +204 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/wandb-summary.json +1 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug-core.log +16 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug-internal.log +12 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug.log +0 -0
cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/run-76mxu43t.wandb +3 -0
eraser_flow_matching/step11500-action-head/metadata.pt +3 -0
eraser_flow_matching/step12000-action-head/metadata.pt +3 -0
eraser_flow_matching/step12000-unsharded/config.yaml +322 -0
eraser_flow_matching/step12000-unsharded/train.pt +3 -0
eraser_flow_matching/wandb/wandb/debug-internal.log +8 -0
eraser_flow_matching/wandb/wandb/debug.log +0 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 wandb/wandb/run-20251002_155442-6v8q0jgn/run-6v8q0jgn.wandb filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 wandb/wandb/run-20251002_155442-6v8q0jgn/run-6v8q0jgn.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/wandb/run-20251002_151047-gal9lnsm/run-gal9lnsm.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/wandb/run-20251002_155441-70dhy5dq/run-70dhy5dq.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/wandb/run-20251002_150921-kqbx0cjv/run-kqbx0cjv.wandb filter=lfs diff=lfs merge=lfs -text
+cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/run-76mxu43t.wandb filter=lfs diff=lfs merge=lfs -text
+wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/run-a1znetn8.wandb filter=lfs diff=lfs merge=lfs -text

all_l1/step8500/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: realworld_20250930_131219
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: 0
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: l1_regression
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: realworld_20250930_131219
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk50_flow_matching/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:34.806823131Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:35.95264112Z","level":"INFO","msg":"stream: created new stream","id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952698801Z","level":"INFO","msg":"stream: started","id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952731371Z","level":"INFO","msg":"sender: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952734591Z","level":"INFO","msg":"writer: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952725981Z","level":"INFO","msg":"handler: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:43:59.99384105Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.001054985}],"total_operations":1}}
+{"time":"2025-10-08T16:44:01.036808965Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:01.423613682Z","level":"INFO","msg":"stream: closing","id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.423631393Z","level":"INFO","msg":"handler: closed","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.425219996Z","level":"INFO","msg":"sender: closed","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.425239166Z","level":"INFO","msg":"stream: closed","id":"quokv8gn"}

cleandesk50_flow_matching/wandb/wandb/debug.log ADDED Viewed

File without changes

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/config.yaml ADDED Viewed

	@@ -0,0 +1,623 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            yfehyqgufhcu23nx6cclbs2foj5p3ccp:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - flow_matching
+                    - --seq_len
+                    - "1600"
+                    - --ft_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "16"
+                    - --global_batch_size
+                    - "126"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - cleandesk50
+                    - --real_world_vla_config_path
+                    - vla_config_realworld/vla_config_cleandesk50.yaml
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "50988601344"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: 49712a42d21a8c739a16ba5eeaec4a0d7b29ab80
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x75d378aea8d8934d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x328cfe1d1a9d2b38"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3c4f0005790d7da3"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x763c831cad37d9b"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x697c203d8e63f05b"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x91078b09ae9b0757"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x2433899c197738b6"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x2bc0f4cfe424c12a"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-319
+                memory:
+                    total: "2434606960640"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1760200645"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "2283"
+                    job_name: mh_cleandesk50_flow_matching
+                    job_nodelist: auh7-1b-gpu-319
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1759941445"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "2283"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-319
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "152784"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-319
+                    topology_addr_pattern: node
+                startedAt: "2025-10-08T16:38:34.545687Z"
+                writerId: yfehyqgufhcu23nx6cclbs2foj5p3ccp
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 2
+                - 13
+                - 15
+                - 16
+                - 61
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "10":
+                - 19
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 1600
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 15
+device_train_grad_accum:
+    value: 0
+device_train_microbatch_size:
+    value: 16
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 1600
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: true
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 126
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: false
+lora_rank:
+    value: 8
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_dim: 7
+        action_head: flow_matching
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_tokenizer:
+            identifier: physical-intelligence/fast
+            tokenizer_dir: null
+        action_use_left_eef: true
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        ff_out_size: null
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        horizon: 8
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: cleandesk50_20251008_163755
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: false

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/output.log ADDED Viewed

	@@ -0,0 +1,180 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/08 [16:38:36] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/08 [16:38:38] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+                 INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:436
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
+****** length of the dataset: 27906
+10/08 [16:38:42] INFO     | >> build_rlds_train_dataset: Loading train dataset: vla_dataset_realworld/train                                                          __init__.py:519
+****** Import RLDSBatchTransform, RLDSDataset successfully.
+****** before RLDS dataset...
+****** data_config.rlds_dataset_name: a1_real_world
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd1aef0130>
+10/08 [16:38:43] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 87212/87212 [00:42<00:00, 2075.20it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd1aeff430>
+10/08 [16:39:29] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/jaco_play/0.1.0/dataset_statistics_e081d4716a3da95df91c79d661ae59fa26a43da49db4bf8d716b622b56
+                          3b0ea3.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd0ff87be0>
+10/08 [16:39:30] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_cable_routing/0.1.0/dataset_statistics_08cb4c5b7c5e6c035fc84ea85b2d54c0c46ad608a8763
+                          4ebb18374088d23cd76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccfc729cf0>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/viola/0.1.0/dataset_statistics_2415d8f7de73c8761fedd7c2a9590667fb0d3fdd26664bf4c100222e5cdb89
+                          b9.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd0ff71900>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1000/1000 [00:04<00:00, 203.13it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccfc72b5b0>
+10/08 [16:39:37] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/austin_buds_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_ccecde24cc01793b221
+                          4eb0c4c5d7cc0e3ccc623db99bd892b83552b20decfb7.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf88199c0>
+10/08 [16:39:38] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 456/456 [00:23<00:00, 19.46it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd04176b60>
+10/08 [16:40:03] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 5100/5100 [00:57<00:00, 88.60it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf83d6c50>
+10/08 [16:41:05] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 240/240 [00:08<00:00, 28.55it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf4362830>
+10/08 [16:41:15] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 559/559 [00:01<00:00, 377.56it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd045275b0>
+10/08 [16:41:17] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/dlr_edan_shared_control_converted_externally_to_rlds/0.1.0/dataset_statistics_b8984563fc3e7ea
+                          c0803c667ef58c9deaf2e747683568306ea1d83505d532a76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcced541930>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1500/1500 [00:05<00:00, 254.65it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf88dff70>
+10/08 [16:41:24] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_fanuc_manipulation/0.1.0/dataset_statistics_a98d349d0364668095ea3ca38c6785e94f35e5e5
+                          8e234c88fac83775a923b0d0.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf8862b60>
+10/08 [16:41:25] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 43264/43264 [00:26<00:00, 1623.73it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcced7d3460>
+10/08 [16:41:58] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1995/1995 [00:01<00:00, 1859.15it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd0c1c2530>
+10/08 [16:42:00] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1003/1003 [00:02<00:00, 406.96it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcced78ffa0>
+10/08 [16:42:03] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 150/150 [00:00<00:00, 1760.96it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf432d420>
+10/08 [16:42:04] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 631/631 [00:01<00:00, 396.82it/s]
+######################################################################################
+# Loading the following 18 datasets (incl. sampling weight):                         #
+# fractal20220817_data: ====================================================0.529250 #
+# jaco_play: ===============================================================0.010898 #
+# berkeley_cable_routing: ==================================================0.005916 #
+# viola: ===================================================================0.021337 #
+# berkeley_autolab_ur5: ====================================================0.027379 #
+# austin_buds_dataset_converted_externally_to_rlds: ========================0.004768 #
+# nyu_franka_play_dataset_converted_externally_to_rlds: ====================0.018817 #
+# furniture_bench_dataset_converted_externally_to_rlds: ====================0.055185 #
+# austin_sailor_dataset_converted_externally_to_rlds: ======================0.049354 #
+# austin_sirius_dataset_converted_externally_to_rlds: ======================0.039129 #
+# dlr_edan_shared_control_converted_externally_to_rlds: ====================0.001248 #
+# utaustin_mutex: ==========================================================0.050583 #
+# berkeley_fanuc_manipulation: =============================================0.017504 #
+# bc_z: ====================================================================0.168166 #
+# roboturk: ================================================================0.000131 #
+# toto: ====================================================================0.000228 #
+# ucsd_kitchen_dataset_converted_externally_to_rlds: =======================0.000006 #
+# iamlab_cmu_pickup_insert_converted_externally_to_rlds: ===================0.000102 #
+######################################################################################
+10/08 [16:42:06] INFO     | >> [*] Threads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                       dataset.py:563
+                 INFO     | >> [*] Reads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                         dataset.py:564
+                 INFO     | >> [*] Constructing datasets...                                                                                                           dataset.py:567
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd1b27d810>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf3c47ac0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf3cc7f70>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf4206f20>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf3915b40>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf3db1810>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd0ffd46d0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd1b1ad9f0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcd1b271600>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf152cfd0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fcced59bd00>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf8819900>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf883bf40>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf83a6aa0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf43bc640>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf89ebb50>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccf17e7ac0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fccecf057b0>
+10/08 [16:42:11] INFO     | >> [*] Applying frame transforms on dataset...                                                                                            dataset.py:607
+****** after RLDSDataset initialization!
+****** length of the dataset: 7154275
+****** Build rlds train dataset: IterableDatasetWrapper successfully.
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7fcd1b210cd0>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fcd1aef37c0>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+                 WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+PROPRIO_DIM 16 does not match ACTION_DIM 16 for AffordVLA
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
+unexpected keys: []
+************************* Initialize model successful!
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+10/08 [16:43:47] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=36,856
+10/08 [16:43:57] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+!!!Training failed: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 593, in main
+    trainer.fit()
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2284, in fit
+    for batch in self.train_loader:
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 43, in fetch
+    return self.collate_fn(data)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 201, in __call__
+    out['timestep'] = torch.stack(timestep_list, dim=0)
+RuntimeError: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-08T16:38:34.545687Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "cleandesk50",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk50.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "49712a42d21a8c739a16ba5eeaec4a0d7b29ab80"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching/wandb",
+  "host":  "auh7-1b-gpu-319",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50988601344"
+    }
+  },
+  "memory":  {
+    "total":  "2434606960640"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "1",
+      "uniqueId":  "0x75d378aea8d8934d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x328cfe1d1a9d2b38",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x3c4f0005790d7da3",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x763c831cad37d9b",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0x697c203d8e63f05b",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x91078b09ae9b0757",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x2433899c197738b6",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x2bc0f4cfe424c12a",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1760200645",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2283",
+    "job_name":  "mh_cleandesk50_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-319",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759941445",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2283",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-319",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "152784",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-319",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "yfehyqgufhcu23nx6cclbs2foj5p3ccp"
+}

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_timestamp":1.7599418275553412e+09,"_wandb":{"runtime":323},"_runtime":323.700364245,"_step":0,"System/Peak GPU Memory (MB)":36856.41796875}

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-10-08T16:38:34.695045626Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3fc8cjxs/port-152974.txt","pid":152974,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-08T16:38:34.696250464Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":152974}
+{"time":"2025-10-08T16:38:34.696200463Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-152974-153157-1697716542/socket","Net":"unix"}}
+{"time":"2025-10-08T16:38:34.795279742Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-08T16:38:34.805004585Z","level":"INFO","msg":"handleInformInit: received","streamId":"quokv8gn","id":"1(@)"}
+{"time":"2025-10-08T16:38:35.952704891Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"quokv8gn","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.423240817Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"quokv8gn","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.426683567Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"quokv8gn","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.48018829Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.48022619Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.48023591Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.480247021Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-10-08T16:44:01.480274301Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.480277991Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-10-08T16:44:01.480326482Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-152974-153157-1697716542/socket","Net":"unix"}}
+{"time":"2025-10-08T16:44:01.480358322Z","level":"INFO","msg":"server is closed"}

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:34.806823131Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:35.95264112Z","level":"INFO","msg":"stream: created new stream","id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952698801Z","level":"INFO","msg":"stream: started","id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952731371Z","level":"INFO","msg":"sender: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952734591Z","level":"INFO","msg":"writer: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:38:35.952725981Z","level":"INFO","msg":"handler: started","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:43:59.99384105Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.001054985}],"total_operations":1}}
+{"time":"2025-10-08T16:44:01.036808965Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:01.423613682Z","level":"INFO","msg":"stream: closing","id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.423631393Z","level":"INFO","msg":"handler: closed","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.425219996Z","level":"INFO","msg":"sender: closed","stream_id":"quokv8gn"}
+{"time":"2025-10-08T16:44:01.425239166Z","level":"INFO","msg":"stream: closed","id":"quokv8gn"}

cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/logs/debug.log ADDED Viewed

File without changes

cleandesk50_l1_regression/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:31.726288089Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:33.081399086Z","level":"INFO","msg":"stream: created new stream","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081437966Z","level":"INFO","msg":"stream: started","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081464946Z","level":"INFO","msg":"sender: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081464936Z","level":"INFO","msg":"writer: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081488306Z","level":"INFO","msg":"handler: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:35.233389442Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.00059375}],"total_operations":1}}
+{"time":"2025-10-08T16:44:37.058187164Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:37.412033949Z","level":"INFO","msg":"stream: closing","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.412048989Z","level":"INFO","msg":"handler: closed","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.4130748Z","level":"INFO","msg":"sender: closed","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.41308173Z","level":"INFO","msg":"stream: closed","id":"fqdwkc8m"}

cleandesk50_l1_regression/wandb/wandb/debug.log ADDED Viewed

File without changes

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/config.yaml ADDED Viewed

	@@ -0,0 +1,623 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            da16lrefa3ue6fcq4audbkzfio2vskvf:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - l1_regression
+                    - --seq_len
+                    - "1600"
+                    - --ft_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "16"
+                    - --global_batch_size
+                    - "126"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - cleandesk50
+                    - --real_world_vla_config_path
+                    - vla_config_realworld/vla_config_cleandesk50.yaml
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "52340371456"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: 49712a42d21a8c739a16ba5eeaec4a0d7b29ab80
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x21a2e88d06c419dc"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa515afd8ced1d39d"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x399226d2b2bfa544"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xfa8b85a4625b04f"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xf61ec17df11883bd"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x9b5c1c302c8129f8"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x3558c3014c813fdb"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x137c9ede1bb1518e"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-188
+                memory:
+                    total: "2434606964736"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1760200645"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "2284"
+                    job_name: mh_cleandesk50_l1_regression
+                    job_nodelist: auh7-1b-gpu-188
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1759941445"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "2284"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-188
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "2621518"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-188
+                    topology_addr_pattern: node
+                startedAt: "2025-10-08T16:38:31.458924Z"
+                writerId: da16lrefa3ue6fcq4audbkzfio2vskvf
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 2
+                - 13
+                - 15
+                - 16
+                - 61
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "10":
+                - 19
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 1600
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 15
+device_train_grad_accum:
+    value: 0
+device_train_microbatch_size:
+    value: 16
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 1600
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: true
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 126
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: false
+lora_rank:
+    value: 8
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_dim: 7
+        action_head: l1_regression
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_tokenizer:
+            identifier: physical-intelligence/fast
+            tokenizer_dir: null
+        action_use_left_eef: true
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        ff_out_size: null
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        horizon: 8
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: cleandesk50_20251008_163748
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: false

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/output.log ADDED Viewed

	@@ -0,0 +1,186 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/08 [16:38:33] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/08 [16:38:35] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/08 [16:38:36] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:436
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
+****** length of the dataset: 27906
+10/08 [16:38:38] INFO     | >> build_rlds_train_dataset: Loading train dataset: vla_dataset_realworld/train                                                          __init__.py:519
+****** Import RLDSBatchTransform, RLDSDataset successfully.
+****** before RLDS dataset...
+****** data_config.rlds_dataset_name: a1_real_world
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f46144c0>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 87212/87212 [00:44<00:00, 1952.50it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f46cf3d0>
+10/08 [16:39:27] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/jaco_play/0.1.0/dataset_statistics_e081d4716a3da95df91c79d661ae59fa26a43da49db4bf8d716b622b56
+                          3b0ea3.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f483f160>
+10/08 [16:39:28] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_cable_routing/0.1.0/dataset_statistics_08cb4c5b7c5e6c035fc84ea85b2d54c0c46ad608a8763
+                          4ebb18374088d23cd76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c6674130>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/viola/0.1.0/dataset_statistics_2415d8f7de73c8761fedd7c2a9590667fb0d3fdd26664bf4c100222e5cdb89
+                          b9.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f46239d0>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_autolab_ur5/0.1.0/dataset_statistics_1b798b015e7b2c4e4396719e3aa4d43a2f400b2edf5dbb0
+                          820cb3df6943d8ddc.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c6f6e2c0>
+10/08 [16:39:29] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/austin_buds_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_ccecde24cc01793b221
+                          4eb0c4c5d7cc0e3ccc623db99bd892b83552b20decfb7.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c6f4e620>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 456/456 [00:25<00:00, 17.70it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cdaae350>
+10/08 [16:39:57] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 5100/5100 [01:00<00:00, 84.00it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcaff3d0>
+10/08 [16:41:03] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 240/240 [00:05<00:00, 46.96it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dc35bdc0>
+10/08 [16:41:09] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/austin_sirius_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_cb2e0273f80029a19
+                          dc3dbb3a3a4118a5598e7bff3ff0245891255825b04b42a.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcb6e380>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/dlr_edan_shared_control_converted_externally_to_rlds/0.1.0/dataset_statistics_b8984563fc3e7ea
+                          c0803c667ef58c9deaf2e747683568306ea1d83505d532a76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcb83eb0>
+10/08 [16:41:10] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1500/1500 [00:02<00:00, 544.82it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd485600>
+10/08 [16:41:14] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_fanuc_manipulation/0.1.0/dataset_statistics_a98d349d0364668095ea3ca38c6785e94f35e5e5
+                          8e234c88fac83775a923b0d0.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c6f6e560>
+10/08 [16:41:15] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 43264/43264 [00:51<00:00, 836.91it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c66743d0>
+10/08 [16:42:10] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/roboturk/0.1.0/dataset_statistics_3aa821e17a2937f941d4102cfadcb1154853cb45dcec07ccc66893b01f6
+                          f1b40.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd4037c0>
+10/08 [16:42:11] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/toto/0.1.0/dataset_statistics_505a51eb76e85fe0969e8e70e45fb8c9ae5d3b1fae2851c7899bea91f74b979
+                          0.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0e40fda20>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/ucsd_kitchen_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_1f1a5f310a2d5a6edc
+                          0e217370e135c8c8598290f11f57025037adcb0d033926.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f55c19c0>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/iamlab_cmu_pickup_insert_converted_externally_to_rlds/0.1.0/dataset_statistics_698a1f892f8866
+                          af9cb4bd5a23611fa44d8c7d9d816f9b3049d2fc3b62442079.json.
+######################################################################################
+# Loading the following 18 datasets (incl. sampling weight):                         #
+# fractal20220817_data: ====================================================0.529250 #
+# jaco_play: ===============================================================0.010898 #
+# berkeley_cable_routing: ==================================================0.005916 #
+# viola: ===================================================================0.021337 #
+# berkeley_autolab_ur5: ====================================================0.027379 #
+# austin_buds_dataset_converted_externally_to_rlds: ========================0.004768 #
+# nyu_franka_play_dataset_converted_externally_to_rlds: ====================0.018817 #
+# furniture_bench_dataset_converted_externally_to_rlds: ====================0.055185 #
+# austin_sailor_dataset_converted_externally_to_rlds: ======================0.049354 #
+# austin_sirius_dataset_converted_externally_to_rlds: ======================0.039129 #
+# dlr_edan_shared_control_converted_externally_to_rlds: ====================0.001248 #
+# utaustin_mutex: ==========================================================0.050583 #
+# berkeley_fanuc_manipulation: =============================================0.017504 #
+# bc_z: ====================================================================0.168166 #
+# roboturk: ================================================================0.000131 #
+# toto: ====================================================================0.000228 #
+# ucsd_kitchen_dataset_converted_externally_to_rlds: =======================0.000006 #
+# iamlab_cmu_pickup_insert_converted_externally_to_rlds: ===================0.000102 #
+######################################################################################
+                 INFO     | >> [*] Threads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                       dataset.py:563
+                 INFO     | >> [*] Reads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                         dataset.py:564
+                 INFO     | >> [*] Constructing datasets...                                                                                                           dataset.py:567
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f4855030>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd54ed40>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0c6c9ef50>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cadc44c0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0d57f5210>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dc535f90>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0ec109ff0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0e41cfbe0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f4965120>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd403e80>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dc335330>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0f46cde10>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd486440>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcb6ffd0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcad3340>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0dcbbba60>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0cd0179d0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7fb0caf39840>
+10/08 [16:42:16] INFO     | >> [*] Applying frame transforms on dataset...                                                                                            dataset.py:607
+****** after RLDSDataset initialization!
+****** length of the dataset: 7154275
+****** Build rlds train dataset: IterableDatasetWrapper successfully.
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7fb0f4675060>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fb0f4615db0>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+10/08 [16:42:17] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+PROPRIO_DIM 16 does not match ACTION_DIM 16 for AffordVLA
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+10/08 [16:44:19] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+10/08 [16:44:31] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+!!!Training failed: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 593, in main
+    trainer.fit()
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2284, in fit
+    for batch in self.train_loader:
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 43, in fetch
+    return self.collate_fn(data)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 201, in __call__
+    out['timestep'] = torch.stack(timestep_list, dim=0)
+RuntimeError: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-08T16:38:31.458924Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "cleandesk50",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk50.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "49712a42d21a8c739a16ba5eeaec4a0d7b29ab80"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression/wandb",
+  "host":  "auh7-1b-gpu-188",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "52340371456"
+    }
+  },
+  "memory":  {
+    "total":  "2434606964736"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "7",
+      "uniqueId":  "0x21a2e88d06c419dc",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xa515afd8ced1d39d",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x399226d2b2bfa544",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xfa8b85a4625b04f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xf61ec17df11883bd",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0x9b5c1c302c8129f8",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0x3558c3014c813fdb",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x137c9ede1bb1518e",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1760200645",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2284",
+    "job_name":  "mh_cleandesk50_l1_regression",
+    "job_nodelist":  "auh7-1b-gpu-188",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759941445",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2284",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-188",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "2621518",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-188",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "da16lrefa3ue6fcq4audbkzfio2vskvf"
+}

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_step":0,"_wandb":{"runtime":361},"_runtime":361.82454539,"System/Peak GPU Memory (MB)":35614.78125,"_timestamp":1.7599418591639297e+09}

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-10-08T16:38:31.69984144Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpzewm22rr/port-2621708.txt","pid":2621708,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-08T16:38:31.702272712Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2621708}
+{"time":"2025-10-08T16:38:31.702674102Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2621708-2621877-1682313073/socket","Net":"unix"}}
+{"time":"2025-10-08T16:38:31.710784598Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-08T16:38:31.724491738Z","level":"INFO","msg":"handleInformInit: received","streamId":"fqdwkc8m","id":"1(@)"}
+{"time":"2025-10-08T16:38:33.081443256Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fqdwkc8m","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.411729039Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"fqdwkc8m","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.41421139Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"fqdwkc8m","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461669803Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461695793Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461702223Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-10-08T16:44:37.461709833Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461743853Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461747193Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-10-08T16:44:37.461760803Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2621708-2621877-1682313073/socket","Net":"unix"}}
+{"time":"2025-10-08T16:44:37.461786643Z","level":"INFO","msg":"server is closed"}

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:31.726288089Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:33.081399086Z","level":"INFO","msg":"stream: created new stream","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081437966Z","level":"INFO","msg":"stream: started","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081464946Z","level":"INFO","msg":"sender: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081464936Z","level":"INFO","msg":"writer: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:38:33.081488306Z","level":"INFO","msg":"handler: started","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:35.233389442Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.00059375}],"total_operations":1}}
+{"time":"2025-10-08T16:44:37.058187164Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:37.412033949Z","level":"INFO","msg":"stream: closing","id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.412048989Z","level":"INFO","msg":"handler: closed","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.4130748Z","level":"INFO","msg":"sender: closed","stream_id":"fqdwkc8m"}
+{"time":"2025-10-08T16:44:37.41308173Z","level":"INFO","msg":"stream: closed","id":"fqdwkc8m"}

cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/logs/debug.log ADDED Viewed

File without changes

cleandesk_flow_matching/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
+size 1331

cleandesk_flow_matching/step12000-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk_20251005_163721
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk_20251005_163721
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk_flow_matching/step12000/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: cleandesk_20251005_163721
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: true
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: libero_4_task_suites_no_noops
+  rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: cleandesk_20251005_163721
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

cleandesk_flow_matching/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-05T16:38:02.602917026Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:38:03.762826737Z","level":"INFO","msg":"stream: created new stream","id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762885338Z","level":"INFO","msg":"stream: started","id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762906828Z","level":"INFO","msg":"writer: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762906838Z","level":"INFO","msg":"sender: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762920708Z","level":"INFO","msg":"handler: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-06T19:04:19.555419176Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-06T20:03:34.950654374Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-07T15:02:38.499153299Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-07T21:28:37.643147942Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T22:22:44.986859439Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T23:26:33.122893273Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/gqyapbwp/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}

cleandesk_flow_matching/wandb/wandb/debug.log ADDED Viewed

File without changes

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/output.log ADDED Viewed

The diff for this file is too large to render. See raw diff

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-05T16:38:02.136539Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "flow_matching",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "cleandesk",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "5071f59d87c6a976691323cbac66d7a988b0b4e7"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching/wandb",
+  "host":  "auh7-1b-gpu-320",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "3778763694080",
+      "used":  "55512412160"
+    }
+  },
+  "memory":  {
+    "total":  "2434606968832"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "0",
+      "uniqueId":  "0xdc567fc68d1a0c91",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xc976bbc2ad247ea6",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0xc7fbd07780c2f202",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0xe56b0d719426d5a8",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "3",
+      "uniqueId":  "0xa7a04689129eefa4",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0xec560c9e435b50ba",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0xcbcb7103099a436c",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0x5bb2d7fad259574f",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1759941420",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2281",
+    "job_name":  "mh_cleandesk_flow_matching",
+    "job_nodelist":  "auh7-1b-gpu-320",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759682220",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2281",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-320",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "561699",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-320",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "cg68x37yky6rbl9tr7pshd5fx8s61qiy"
+}

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,6 @@

+{"time":"2025-10-05T16:38:02.386747526Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpn27ektbq/port-561890.txt","pid":561890,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-05T16:38:02.388360677Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":561890}
+{"time":"2025-10-05T16:38:02.389267188Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-561890-562080-1724540830/socket","Net":"unix"}}
+{"time":"2025-10-05T16:38:02.586570534Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-05T16:38:02.600696794Z","level":"INFO","msg":"handleInformInit: received","streamId":"gqyapbwp","id":"1(@)"}
+{"time":"2025-10-05T16:38:03.762891268Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gqyapbwp","id":"1(@)"}

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-05T16:38:02.602917026Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-05T16:38:03.762826737Z","level":"INFO","msg":"stream: created new stream","id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762885338Z","level":"INFO","msg":"stream: started","id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762906828Z","level":"INFO","msg":"writer: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762906838Z","level":"INFO","msg":"sender: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-05T16:38:03.762920708Z","level":"INFO","msg":"handler: started","stream_id":"gqyapbwp"}
+{"time":"2025-10-06T19:04:19.555419176Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-06T20:03:34.950654374Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-07T15:02:38.499153299Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-10-07T21:28:37.643147942Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T22:22:44.986859439Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-07T23:26:33.122893273Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/gqyapbwp/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}

cleandesk_flow_matching/wandb/wandb/run-20251005_163802-gqyapbwp/logs/debug.log ADDED Viewed

File without changes

cleandesk_l1_regression/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:32.19998745Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:33.339827981Z","level":"INFO","msg":"stream: created new stream","id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339874102Z","level":"INFO","msg":"stream: started","id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339905492Z","level":"INFO","msg":"handler: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339893552Z","level":"INFO","msg":"writer: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339947873Z","level":"INFO","msg":"sender: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:43:58.756754711Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.006574606}],"total_operations":1}}
+{"time":"2025-10-08T16:43:59.766243448Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:00.127335826Z","level":"INFO","msg":"stream: closing","id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.127349836Z","level":"INFO","msg":"handler: closed","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.128408003Z","level":"INFO","msg":"sender: closed","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.128424754Z","level":"INFO","msg":"stream: closed","id":"76mxu43t"}

cleandesk_l1_regression/wandb/wandb/debug.log ADDED Viewed

File without changes

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/config.yaml ADDED Viewed

	@@ -0,0 +1,623 @@

+_wandb:
+    value:
+        cli_version: 0.21.4
+        e:
+            9zghejqbkg668a368vduhoyzhbv4wgq6:
+                args:
+                    - qwen2_7b
+                    - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression
+                    - --vision_backbone
+                    - openai
+                    - --action_head
+                    - l1_regression
+                    - --seq_len
+                    - "1600"
+                    - --ft_llm
+                    - --checkpoint
+                    - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+                    - --device_train_microbatch_size
+                    - "16"
+                    - --global_batch_size
+                    - "126"
+                    - --dataset
+                    - vla_dataset_realworld
+                    - --llm_learning_rate
+                    - "5e-5"
+                    - --wandb_entity
+                    - henryeap
+                    - --wandb_project
+                    - a1-realworld
+                    - --wandb_run_name
+                    - cleandesk
+                    - --real_world_vla_config_path
+                    - vla_config_realworld/vla_config_cleandesk.yaml
+                    - --save_overwrite
+                codePath: launch_scripts/train_vla.py
+                codePathLocal: launch_scripts/train_vla.py
+                cpu_count: 64
+                cpu_count_logical: 128
+                disk:
+                    /:
+                        total: "470343073792"
+                        used: "50668195840"
+                email: ihenrykwok@outlook.com
+                executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
+                git:
+                    commit: 49712a42d21a8c739a16ba5eeaec4a0d7b29ab80
+                    remote: https://github.com/Spatialtemporal-AI/A1.git
+                gpu: Instinct MI210
+                gpu_amd:
+                    - id: "3"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x62b25d667064a7ff"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "6"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xbdb93fac1aa97618"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "7"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x850c5a1ff5d005be"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "2"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x7a3e2781f4182456"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "4"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x4c21a2ad76408df6"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "0"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xa8d2c33980704bf2"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "1"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0xd13265721a117b54"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                    - id: "5"
+                      maxPower: "300.0"
+                      mclkRange: 400Mhz - 1600Mhz
+                      model: "0x740f"
+                      performanceLevel: auto
+                      sclkRange: 500Mhz - 1700Mhz
+                      series: Instinct MI210
+                      sku: D67301V
+                      uniqueId: "0x19ee82506963794b"
+                      vbiosVersion: 113-D67301V-073
+                      vendor: Advanced Micro Devices, Inc. [AMD/ATI]
+                gpu_count: 8
+                host: auh7-1b-gpu-316
+                memory:
+                    total: "2434606952448"
+                os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
+                program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
+                python: CPython 3.10.18
+                root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression/wandb
+                slurm:
+                    cluster_name: ai-04r
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "128"
+                    gpus_on_node: "8"
+                    gtids: "0"
+                    job_account: faculty-acc
+                    job_cpus_per_node: "128"
+                    job_end_time: "1760200645"
+                    job_gid: "2000"
+                    job_gpus: 0,1,2,3,4,5,6,7
+                    job_id: "2282"
+                    job_name: mh_cleandesk_l1_regression
+                    job_nodelist: auh7-1b-gpu-316
+                    job_num_nodes: "1"
+                    job_partition: faculty
+                    job_qos: xdqos
+                    job_start_time: "1759941445"
+                    job_uid: "2013"
+                    job_user: xiaodan
+                    jobid: "2282"
+                    localid: "0"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: auh7-1b-gpu-316
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
+                    submit_host: auh-1b-cpu-login-001
+                    task_pid: "1925818"
+                    tasks_per_node: "1"
+                    topology_addr: auh7-1b-gpu-316
+                    topology_addr_pattern: node
+                startedAt: "2025-10-08T16:38:31.938958Z"
+                writerId: 9zghejqbkg668a368vduhoyzhbv4wgq6
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 41
+                - 49
+                - 51
+                - 53
+                - 63
+                - 71
+                - 83
+                - 95
+                - 105
+            "3":
+                - 2
+                - 13
+                - 15
+                - 16
+                - 61
+            "4": 3.10.18
+            "5": 0.21.4
+            "6": 4.56.1
+            "10":
+                - 19
+            "12": 0.21.4
+            "13": linux-x86_64
+activation_checkpointing:
+    value: whole_layer
+allow_resume:
+    value: false
+batch_divisor:
+    value: global_batch
+canceled_check_interval:
+    value: 50
+checkpoint_dir:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+compile:
+    value: null
+console_log_interval:
+    value: 1
+data:
+    value:
+        dataset: vla_dataset_realworld
+        drop_last: true
+        for_inference: false
+        lerobot_episode_index_end: null
+        lerobot_episode_index_start: null
+        mixture: null
+        multi_modal: torch
+        num_workers: 0
+        pad: to_max
+        persistent_workers: false
+        pin_memory: true
+        prefetch_factor: null
+        rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+        rlds_dataset_name: libero_4_task_suites_no_noops
+        rlds_read_threads: 8
+        rlds_shuffle_buffer_size: 100000
+        rlds_traj_threads: 8
+        root_size_mixture: null
+        seed: 95818
+        sequence_length: 1600
+        shuffle: true
+        shuffle_messages: false
+        split: train
+        timeout: 0
+        use_proprio: true
+        use_wrist_image: true
+device_eval_batch_size:
+    value: 4
+device_inf_eval_batch_size:
+    value: 16
+device_train_batch_size:
+    value: 15
+device_train_grad_accum:
+    value: 0
+device_train_microbatch_size:
+    value: 16
+dry_run:
+    value: false
+early_exit:
+    value: false
+epoch:
+    value: null
+eval_interval:
+    value: 0
+eval_on_load:
+    value: false
+eval_subset_num_batches:
+    value: -1
+evaluators:
+    value:
+        - data:
+            dataset: vla_dataset_realworld
+            drop_last: true
+            for_inference: false
+            lerobot_episode_index_end: 765
+            lerobot_episode_index_start: 353
+            mixture: null
+            multi_modal: torch
+            num_workers: 0
+            pad: to_max
+            persistent_workers: true
+            pin_memory: true
+            prefetch_factor: null
+            rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+            rlds_dataset_name: libero_4_task_suites_no_noops
+            rlds_read_threads: 8
+            rlds_shuffle_buffer_size: 256000
+            rlds_traj_threads: 8
+            root_size_mixture: null
+            seed: null
+            sequence_length: 1600
+            shuffle: false
+            shuffle_messages: false
+            split: validation
+            timeout: 0
+            use_proprio: true
+            use_wrist_image: true
+          device_eval_batch_size: null
+          eval_name: null
+          label: val
+          max_examples: null
+          max_new_tokens: 448
+          mm_evaluator: null
+          save_dir: null
+          save_to_checkpoint_dir: false
+          skip_if_metrics_cached: true
+          subset_num_batches: 64
+extra_steps_after_cancel:
+    value: 10
+fast_forward_batches:
+    value: null
+force_save_unsharded:
+    value: false
+fsdp:
+    value:
+        hybrid_sharding_num_model_replicas: null
+        precision: float
+        sharding_strategy: FULL_SHARD
+        use_orig_params: true
+        wrapping_strategy: by_block_and_size
+ft_connector:
+    value: false
+ft_embedding:
+    value: lm_head
+ft_llm:
+    value: true
+ft_vit:
+    value: false
+fused_loss:
+    value: null
+gen1_gc_interval:
+    value: 1
+global_train_batch_size:
+    value: 126
+inf_eval_interval:
+    value: -1
+inf_eval_subset_num_batches:
+    value: -1
+inf_evaluators:
+    value: []
+initial_model_checkpoint:
+    value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+keep_lr_on_load:
+    value: true
+load_model_config:
+    value: null
+load_path:
+    value: null
+load_path_sharded_checkpointer:
+    value: null
+lora:
+    value: false
+lora_connector:
+    value: false
+lora_llm:
+    value: false
+lora_rank:
+    value: 8
+lora_vit:
+    value: false
+max_duration:
+    value: 500000
+max_grad_norm:
+    value: 1
+max_grad_norm_ratio:
+    value: null
+model:
+    value:
+        action_dim: 7
+        action_head: l1_regression
+        action_head_dit_depth: 28
+        action_head_dit_hidden_size: 1152
+        action_head_dit_num_heads: 16
+        action_tokenizer:
+            identifier: physical-intelligence/fast
+            tokenizer_dir: null
+        action_use_left_eef: true
+        action_use_mobile_base: false
+        activation_type: swiglu
+        additional_vocab_size: 128
+        always_start_with_space: true
+        attention_dropout: 0
+        attention_layer_norm: false
+        attention_layer_norm_with_affine: true
+        attention_type: sdpa
+        bias_for_layer_norm: null
+        block_group_size: 1
+        block_type: sequential
+        clip_qkv: null
+        crop_mode: overlap-and-resize-c2
+        d_model: 3584
+        default_inference_len: 65
+        embedding_dropout: 0
+        embedding_size: 152064
+        ff_out_size: null
+        fix_image_padding: true
+        float32_attention: true
+        head_dim: null
+        horizon: 8
+        image_feature_dropout: 0
+        image_padding_embed: pad_and_partial_pad
+        image_pooling_2d: attention_meanq
+        image_pooling_h: 2
+        image_pooling_w: 2
+        image_projector: mlp
+        include_bias: false
+        init_cutoff_factor: null
+        init_device: null
+        init_fn: normal
+        init_std: 0.02
+        initializer_range: 0.02
+        layer_norm_eps: 1e-06
+        layer_norm_type: rms
+        layer_norm_with_affine: true
+        llm_causal_attention: false
+        llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+        low_cpu_fsdp: true
+        max_crops: 12
+        max_position_embeddings: null
+        max_sequence_length: 4096
+        message_formatting: role
+        mlp_hidden_size: 37888
+        mlp_ratio: 4
+        moe_capacity_factor: 1.25
+        moe_dropless: true
+        moe_interleave: false
+        moe_lbl_in_fp32: false
+        moe_log_expert_assignment: false
+        moe_loss_weight: 0.1
+        moe_mlp_impl: sparse
+        moe_num_experts: 8
+        moe_shared_expert: false
+        moe_top_k: 2
+        moe_zloss_weight: null
+        multi_annotation_weighting: root_subsegments
+        n_heads: 28
+        n_kv_heads: 4
+        n_layers: 28
+        new_embedding_init_range: 0.02
+        norm_after: false
+        normalize_input_embeds: false
+        num_diffusion_inference_steps: 30
+        num_diffusion_steps: 1000
+        overlap_margins:
+            - 4
+            - 4
+        pad_tokenizer: true
+        pad_value: 0
+        precision: amp_bf16
+        prompt_type: uber_model
+        qkv_bias: true
+        residual_dropout: 0.1
+        response_residual_dropout: 0
+        rope: true
+        rope_full_precision: true
+        rope_theta: 1e+06
+        scale_logits: false
+        system_prompt_kind: demo_or_style
+        tokenizer:
+            identifier: Qwen/Qwen2-7B
+            tokenizer_dir: null
+        use_col_tokens: true
+        use_position_ids: true
+        use_proprio: true
+        vision_backbone:
+            attention_dropout: 0
+            fsdp_wrap: false
+            image_default_input_size:
+                - 336
+                - 336
+            image_dropout_rate: 0
+            image_emb_dim: 1024
+            image_head_dim: 64
+            image_mlp_activations: quick_gelu
+            image_mlp_dim: 4096
+            image_model_type: openai
+            image_norm_eps: 1e-05
+            image_num_heads: 16
+            image_num_key_value_heads: 16
+            image_num_layers: 23
+            image_num_pos: 577
+            image_patch_size: 14
+            image_pos_patch_size: 14
+            initializer_range: 0.02
+            residual_dropout: 0
+            resize_mode: default
+        vit_layers:
+            - -2
+            - -9
+        vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+        vocab_size: 152064
+        weight_tying: false
+multi_component_grad_norm:
+    value: true
+no_pre_train_checkpoint:
+    value: true
+optimizer:
+    value:
+        betas:
+            - 0.9
+            - 0.95
+        connector_betas:
+            - 0.9
+            - 0.95
+        connector_eps: 1e-06
+        connector_learning_rate: 0.0002
+        connector_weight_decay: 0
+        eps: 1e-05
+        learning_rate: 0.0001
+        llm_betas:
+            - 0.9
+            - 0.95
+        llm_eps: 1e-06
+        llm_learning_rate: 5e-05
+        llm_weight_decay: 0
+        metrics_log_interval: 20
+        name: adamw
+        vit_betas:
+            - 0.9
+            - 0.95
+        vit_eps: 1e-06
+        vit_learning_rate: 6e-06
+        vit_weight_decay: 0
+        weight_decay: 0.01
+precision:
+    value: amp_bf16
+python_profiling:
+    value: false
+remote_save_folder:
+    value: null
+reset_dataloader_state:
+    value: false
+reset_optimizer_state:
+    value: false
+reset_trainer_state:
+    value: false
+restore_dataloader:
+    value: true
+run_name:
+    value: cleandesk_20251008_163754
+save_dataloader_state:
+    value: false
+save_folder:
+    value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression
+save_interval:
+    value: 500
+save_interval_action_head:
+    value: 500
+save_interval_ephemeral:
+    value: null
+save_interval_unsharded:
+    value: 500
+save_num_action_head_checkpoints_to_keep:
+    value: 2
+save_num_checkpoints_to_keep:
+    value: 1
+save_num_unsharded_checkpoints_to_keep:
+    value: 1
+save_overwrite:
+    value: true
+scheduler:
+    value:
+        alpha_f: 0.1
+        connector_t_warmup: 200
+        grad_clip_warmup_factor: null
+        grad_clip_warmup_steps: null
+        llm_t_warmup: 2000
+        name: multimodal
+        t_max: null
+        t_warmup: 100
+        units: steps
+        vit_t_warmup: 2000
+        warmup_min_lr: 0
+seed:
+    value: 6198
+sharded_checkpointer:
+    value: torch_legacy
+softmax_auxiliary_loss:
+    value: true
+softmax_auxiliary_loss_scale:
+    value: 0.0001
+speed_monitor:
+    value:
+        gpu_flops_available: null
+        window_size: 20
+stop_after:
+    value: null
+stop_at:
+    value: 500000
+time_limit:
+    value: null
+torch_profiling:
+    value: false
+train_exit_random_layer:
+    value: false
+use_lora:
+    value: false

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/output.log ADDED Viewed

	@@ -0,0 +1,183 @@

+wandb: Detected [openai] in use.
+wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
+wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
+10/08 [16:38:34] WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
+****** Skip RLDS main; path not found: None
+****** start build LeRobot main...
+build_tokenizer, cache_dir None tokenizer_dir None
+10/08 [16:38:35] INFO     | >> Padding tokenizer with 418 tokens                                                                                                    tokenizer.py:130
+10/08 [16:38:36] INFO     | >> Loading train dataset: vla_dataset_realworld/train                                                                                    __init__.py:436
+****** before LeRobot dataset...
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
+****** length of the dataset: 72641
+10/08 [16:38:42] INFO     | >> build_rlds_train_dataset: Loading train dataset: vla_dataset_realworld/train                                                          __init__.py:519
+****** Import RLDSBatchTransform, RLDSDataset successfully.
+****** before RLDS dataset...
+****** data_config.rlds_dataset_name: a1_real_world
+****** data_config.rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc287856f0>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 87212/87212 [00:43<00:00, 2004.76it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc2271f640>
+10/08 [16:39:30] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/jaco_play/0.1.0/dataset_statistics_e081d4716a3da95df91c79d661ae59fa26a43da49db4bf8d716b622b56
+                          3b0ea3.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc27894400>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_cable_routing/0.1.0/dataset_statistics_08cb4c5b7c5e6c035fc84ea85b2d54c0c46ad608a8763
+                          4ebb18374088d23cd76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc2050d510>
+10/08 [16:39:31] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/viola/0.1.0/dataset_statistics_2415d8f7de73c8761fedd7c2a9590667fb0d3fdd26664bf4c100222e5cdb89
+                          b9.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc2769a050>
+                 INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1000/1000 [00:05<00:00, 184.79it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc27703af0>
+10/08 [16:39:38] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/austin_buds_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_ccecde24cc01793b221
+                          4eb0c4c5d7cc0e3ccc623db99bd892b83552b20decfb7.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc227130d0>
+10/08 [16:39:39] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 456/456 [00:24<00:00, 18.55it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc107ec1c0>
+10/08 [16:40:06] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 5100/5100 [00:57<00:00, 88.36it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc2051ccd0>
+10/08 [16:41:08] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 240/240 [00:08<00:00, 29.80it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc00979a20>
+10/08 [16:41:20] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/austin_sirius_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_cb2e0273f80029a19
+                          dc3dbb3a3a4118a5598e7bff3ff0245891255825b04b42a.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf9b9c7f0>
+                 INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/dlr_edan_shared_control_converted_externally_to_rlds/0.1.0/dataset_statistics_b8984563fc3e7ea
+                          c0803c667ef58c9deaf2e747683568306ea1d83505d532a76.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc101d3430>
+10/08 [16:41:21] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1500/1500 [00:02<00:00, 664.65it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc008400d0>
+10/08 [16:41:24] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/berkeley_fanuc_manipulation/0.1.0/dataset_statistics_a98d349d0364668095ea3ca38c6785e94f35e5e5
+                          8e234c88fac83775a923b0d0.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf95ee740>
+10/08 [16:41:25] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 43264/43264 [00:32<00:00, 1321.74it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc0092a200>
+10/08 [16:42:02] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/roboturk/0.1.0/dataset_statistics_3aa821e17a2937f941d4102cfadcb1154853cb45dcec07ccc66893b01f6
+                          f1b40.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc1013b7c0>
+10/08 [16:42:03] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 1003/1003 [00:00<00:00, 1714.08it/s]
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf9480880>
+10/08 [16:42:04] INFO     | >> [*] Loading existing dataset statistics from                                                                                        data_utils.py:200
+                          /vast/users/xiaodan/zhangjian/datasets/OXE/ucsd_kitchen_dataset_converted_externally_to_rlds/0.1.0/dataset_statistics_1f1a5f310a2d5a6edc
+                          0e217370e135c8c8598290f11f57025037adcb0d033926.json.
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbfdb13af0>
+10/08 [16:42:05] INFO     | >> [*] Computing dataset statistics. This may take a bit, but should only need to happen once.                                         data_utils.py:227
+100%|██████████| 631/631 [00:00<00:00, 1276.86it/s]
+######################################################################################
+# Loading the following 18 datasets (incl. sampling weight):                         #
+# fractal20220817_data: ====================================================0.529250 #
+# jaco_play: ===============================================================0.010898 #
+# berkeley_cable_routing: ==================================================0.005916 #
+# viola: ===================================================================0.021337 #
+# berkeley_autolab_ur5: ====================================================0.027379 #
+# austin_buds_dataset_converted_externally_to_rlds: ========================0.004768 #
+# nyu_franka_play_dataset_converted_externally_to_rlds: ====================0.018817 #
+# furniture_bench_dataset_converted_externally_to_rlds: ====================0.055185 #
+# austin_sailor_dataset_converted_externally_to_rlds: ======================0.049354 #
+# austin_sirius_dataset_converted_externally_to_rlds: ======================0.039129 #
+# dlr_edan_shared_control_converted_externally_to_rlds: ====================0.001248 #
+# utaustin_mutex: ==========================================================0.050583 #
+# berkeley_fanuc_manipulation: =============================================0.017504 #
+# bc_z: ====================================================================0.168166 #
+# roboturk: ================================================================0.000131 #
+# toto: ====================================================================0.000228 #
+# ucsd_kitchen_dataset_converted_externally_to_rlds: =======================0.000006 #
+# iamlab_cmu_pickup_insert_converted_externally_to_rlds: ===================0.000102 #
+######################################################################################
+10/08 [16:42:06] INFO     | >> [*] Threads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                       dataset.py:563
+                 INFO     | >> [*] Reads per Dataset: [14  1  1  1  1  1  1  2  1  1  1  1  1  5  1  1  1  1]                                                         dataset.py:564
+                 INFO     | >> [*] Constructing datasets...                                                                                                           dataset.py:567
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc00840040>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc18089900>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc27699c90>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf9b33400>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc27701660>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc1003acb0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbfff5f9a0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc00373ac0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbffe0ad10>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbfdd10bb0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf893b7c0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf81a7370>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf80c0760>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf3fd0bb0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf3e98a30>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc27739960>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efbf947d4e0>
+214************** <tensorflow_datasets.core.read_only_builder.ReadOnlyBuilder object at 0x7efc107bc850>
+10/08 [16:42:11] INFO     | >> [*] Applying frame transforms on dataset...                                                                                            dataset.py:607
+****** after RLDSDataset initialization!
+****** length of the dataset: 7154275
+****** Build rlds train dataset: IterableDatasetWrapper successfully.
+****** path: None
+****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
+****** After build vla train dataset...
+****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7efc225c0040>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7efc22713f70>]
+****** Before build mixed iterable dataset...
+****** Build vla train dataloader successfully!
+************************* Build train_dataloader successful!
+************************* Before build_inf_evaluators
+                 WARNING  | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No       warnings.py:109
+                          device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
+                            warnings.warn(  # warn only once
+************************* Build evaluators successful!
+************************* Early exit flags: early_exit=False
+PROPRIO_DIM 16 does not match ACTION_DIM 16 for AffordVLA
+************************* Initialize model successful!
+***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
+***** Load checkpoint successful!
+missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
+unexpected keys: []
+************************* Initialize model successful!
+************************* Before FSDP model wrapping
+************************* FSDP model wrapping successful!
+************************* Before building optimizer and scheduler
+10/08 [16:43:46] INFO     | >> Constructing optimizer with 2 param groups                                                                                              optim.py:1283
+**************************************************
+After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
+************************* VLATrainer initialized successfully!
+************************* Before trainer.fit()
+Pre-train system metrics
+    System/Peak GPU Memory (MB)=35,614
+10/08 [16:43:56] WARNING  | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use    warnings.py:109
+                          sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
+                            timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
+!!!Training failed: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+Traceback (most recent call last):
+  File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 593, in main
+    trainer.fit()
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2284, in fit
+    for batch in self.train_loader:
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 734, in __next__
+    data = self._next_data()
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 790, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+  File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 43, in fetch
+    return self.collate_fn(data)
+  File "/vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py", line 201, in __call__
+    out['timestep'] = torch.stack(timestep_list, dim=0)
+RuntimeError: stack expects each tensor to be equal size, but got [] at entry 0 and [1] at entry 1
+wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,286 @@

+ai2-molmo==0.0.0
+astunparse==1.6.3
+flatbuffers==25.2.10
+gast==0.6.0
+google-pasta==0.2.0
+h5py==3.14.0
+libclang==18.1.1
+Markdown==3.9
+namex==0.1.0
+opt_einsum==3.4.0
+optree==0.17.0
+tensorboard-data-server==0.7.2
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+Werkzeug==3.1.3
+Brotli==1.1.0
+Farama-Notifications==0.0.4
+MarkupSafe==2.1.5
+PyYAML==6.0.2
+absl-py==2.3.1
+accelerate==1.10.1
+ai2-molmo==0.0.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.10.0
+array_record==0.8.1
+async-timeout==5.0.1
+attrs==25.3.0
+av==15.1.0
+backports.tarfile==1.2.0
+beaker-gantry==3.2.0
+beaker-py==2.5.0
+black==23.12.1
+blinker==1.9.0
+boltons==25.0.0
+boto3==1.40.33
+botocore==1.40.33
+build==1.3.0
+cached_path==1.7.3
+cached-property==2.0.1
+cachetools==5.5.2
+certifi==2025.8.3
+cffi==2.0.0
+charset-normalizer==3.4.3
+click==8.2.1
+click-help-colors==0.9.4
+click-option-group==0.5.7
+cloudpickle==3.1.1
+cmake==4.1.0
+contourpy==1.3.2
+cryptography==46.0.1
+cycler==0.12.1
+dataclass-extensions==0.2.3
+datasets==3.6.0
+decorator==5.2.1
+deepdiff==8.6.1
+diffusers==0.35.1
+dill==0.3.8
+distro==1.9.0
+dlimp==0.0.1
+dm-tree==0.1.9
+docutils==0.22.1
+draccus==0.10.0
+editdistance==0.8.1
+einops==0.8.1
+einops-exts==0.0.4
+et_xmlfile==2.0.0
+etils==1.13.0
+evdev==1.9.2
+exceptiongroup==1.3.0
+face==24.0.0
+fastapi==0.116.2
+ffmpy==0.6.1
+fiddle==0.3.0
+filelock==3.13.1
+Flask==3.1.2
+fonttools==4.60.0
+frozenlist==1.7.0
+fsspec==2023.9.2
+ftfy==6.3.1
+gcsfs==2023.9.2
+gitdb==4.0.12
+GitPython==3.1.45
+glom==24.11.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-auth-oauthlib==1.2.2
+google-cloud-core==2.4.3
+google-cloud-storage==2.19.0
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+gradio==5.46.0
+gradio_client==1.13.0
+graphviz==0.21
+groovy==0.1.2
+grpcio==1.75.0
+gymnasium==0.29.1
+h11==0.16.0
+hf_transfer==0.1.9
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.0
+id==1.5.0
+idna==3.10
+imageio==2.37.0
+imageio-ffmpeg==0.6.0
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+inquirerpy==0.3.4
+isort==5.12.0
+itsdangerous==2.2.0
+jaraco.classes==3.4.0
+jaraco.context==6.0.1
+jaraco.functools==4.3.0
+jeepney==0.9.0
+Jinja2==3.1.4
+jiter==0.11.0
+jmespath==1.0.1
+joblib==1.5.2
+jsonlines==4.0.0
+keras==2.15.0
+keyring==25.6.0
+kiwisolver==1.4.9
+latex2sympy2_extended==1.10.2
+lerobot==0.3.4
+Levenshtein==0.27.1
+libcst==1.8.4
+lightning-utilities==0.15.2
+markdown-it-py==4.0.0
+math-verify==0.8.0
+matplotlib==3.10.6
+mdurl==0.1.2
+mergedeep==1.3.4
+ml-dtypes==0.2.0
+ml_dtypes==0.5.3
+more-itertools==10.8.0
+mpmath==1.3.0
+msgspec==0.19.0
+multidict==6.6.4
+multiprocess==0.70.16
+mypy==1.3.0
+mypy_extensions==1.1.0
+necessary==0.4.3
+networkx==3.3
+nh3==0.3.0
+nltk==3.9.1
+numpy==1.26.4
+oauthlib==3.3.1
+omegaconf==2.3.0
+openai==1.108.0
+opencv-python-headless==4.12.0.88
+OpenEXR==3.4.0
+openpyxl==3.1.5
+orderly-set==5.5.0
+orjson==3.11.3
+packaging==25.0
+pandas==2.3.2
+pathspec==0.12.1
+petname==2.6
+pfzy==0.3.4
+pillow==11.0.0
+pip==25.2
+platformdirs==4.4.0
+pluggy==1.6.0
+promise==2.3
+prompt_toolkit==3.0.52
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==4.21.12
+protobuf==6.32.1
+psutil==7.1.0
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pynput==1.8.1
+pyparsing==3.2.4
+pyproject_hooks==1.2.0
+pyserial==3.5
+pytest==8.4.2
+pytest-sphinx==0.6.3
+python-dateutil==2.9.0.post0
+python-Levenshtein==0.27.1
+python-multipart==0.0.20
+python-xlib==0.33
+pytorch-triton-rocm==3.4.0
+pytz==2025.2
+pyyaml-include==1.4.1
+RapidFuzz==3.14.1
+readme_renderer==44.0
+regex==2025.9.1
+requests==2.32.5
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+requirements-parser==0.13.0
+rerun-sdk==0.22.1
+rfc3986==2.0.0
+rich==13.9.4
+rsa==4.9.1
+ruff==0.13.0
+s3transfer==0.14.0
+safehttpx==0.1.6
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.15.3
+SecretStorage==3.4.0
+semantic-version==2.10.0
+sentencepiece==0.2.1
+sentry-sdk==2.38.0
+setuptools==78.1.1
+shellingham==1.5.4
+six==1.17.0
+smart_open==7.3.1
+smashed==0.21.5
+smmap==5.0.2
+sniffio==1.3.1
+starlette==0.48.0
+sympy==1.13.3
+tensorboard==2.15.2
+tensorboard==2.19.0
+tensorflow==2.15.0
+tensorflow-addons==0.23.0
+tensorflow-datasets==4.9.3
+tensorflow-estimator==2.15.0
+tensorflow-graphics==2021.12.3
+tensorflow-metadata==1.17.2
+threadpoolctl==3.6.0
+timm==1.0.19
+tokenizers==0.22.0
+toml==0.10.2
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0+rocm6.4
+torchcodec==0.5
+torchmetrics==1.8.2
+torchvision==0.23.0+rocm6.4
+tqdm==4.67.1
+transformers==4.56.1
+trimesh==4.8.2
+trouting==0.3.3
+twine==6.2.0
+typeguard==2.13.3
+typer==0.17.4
+typing_extensions==4.15.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+wandb==0.21.4
+wcwidth==0.2.13
+websockets==15.0.1
+wheel==0.45.1
+wrapt==1.14.2
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+lerobot==0.3.4
+minLoRA==0.1.0
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,204 @@

+{
+  "os":  "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-10-08T16:38:31.938958Z",
+  "args":  [
+    "qwen2_7b",
+    "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression",
+    "--vision_backbone",
+    "openai",
+    "--action_head",
+    "l1_regression",
+    "--seq_len",
+    "1600",
+    "--ft_llm",
+    "--checkpoint",
+    "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
+    "--device_train_microbatch_size",
+    "16",
+    "--global_batch_size",
+    "126",
+    "--dataset",
+    "vla_dataset_realworld",
+    "--llm_learning_rate",
+    "5e-5",
+    "--wandb_entity",
+    "henryeap",
+    "--wandb_project",
+    "a1-realworld",
+    "--wandb_run_name",
+    "cleandesk",
+    "--real_world_vla_config_path",
+    "vla_config_realworld/vla_config_cleandesk.yaml",
+    "--save_overwrite"
+  ],
+  "program":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
+  "codePath":  "launch_scripts/train_vla.py",
+  "codePathLocal":  "launch_scripts/train_vla.py",
+  "git":  {
+    "remote":  "https://github.com/Spatialtemporal-AI/A1.git",
+    "commit":  "49712a42d21a8c739a16ba5eeaec4a0d7b29ab80"
+  },
+  "email":  "ihenrykwok@outlook.com",
+  "root":  "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression/wandb",
+  "host":  "auh7-1b-gpu-316",
+  "executable":  "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "Instinct MI210",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "470343073792",
+      "used":  "50668195840"
+    }
+  },
+  "memory":  {
+    "total":  "2434606952448"
+  },
+  "gpu_amd":  [
+    {
+      "id":  "3",
+      "uniqueId":  "0x62b25d667064a7ff",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "6",
+      "uniqueId":  "0xbdb93fac1aa97618",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "7",
+      "uniqueId":  "0x850c5a1ff5d005be",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "2",
+      "uniqueId":  "0x7a3e2781f4182456",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "4",
+      "uniqueId":  "0x4c21a2ad76408df6",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "0",
+      "uniqueId":  "0xa8d2c33980704bf2",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "1",
+      "uniqueId":  "0xd13265721a117b54",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    },
+    {
+      "id":  "5",
+      "uniqueId":  "0x19ee82506963794b",
+      "vbiosVersion":  "113-D67301V-073",
+      "performanceLevel":  "auto",
+      "maxPower":  "300.0",
+      "series":  "Instinct MI210",
+      "model":  "0x740f",
+      "vendor":  "Advanced Micro Devices, Inc. [AMD/ATI]",
+      "sku":  "D67301V",
+      "sclkRange":  "500Mhz - 1700Mhz",
+      "mclkRange":  "400Mhz - 1600Mhz"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "ai-04r",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "128",
+    "gpus_on_node":  "8",
+    "gtids":  "0",
+    "job_account":  "faculty-acc",
+    "job_cpus_per_node":  "128",
+    "job_end_time":  "1760200645",
+    "job_gid":  "2000",
+    "job_gpus":  "0,1,2,3,4,5,6,7",
+    "job_id":  "2282",
+    "job_name":  "mh_cleandesk_l1_regression",
+    "job_nodelist":  "auh7-1b-gpu-316",
+    "job_num_nodes":  "1",
+    "job_partition":  "faculty",
+    "job_qos":  "xdqos",
+    "job_start_time":  "1759941445",
+    "job_uid":  "2013",
+    "job_user":  "xiaodan",
+    "jobid":  "2282",
+    "localid":  "0",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "auh7-1b-gpu-316",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
+    "submit_host":  "auh-1b-cpu-login-001",
+    "task_pid":  "1925818",
+    "tasks_per_node":  "1",
+    "topology_addr":  "auh7-1b-gpu-316",
+    "topology_addr_pattern":  "node"
+  },
+  "writerId":  "9zghejqbkg668a368vduhoyzhbv4wgq6"
+}

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_step":0,"System/Peak GPU Memory (MB)":35614.78125,"_timestamp":1.7599418260827e+09,"_wandb":{"runtime":325},"_runtime":325.049768384}

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-10-08T16:38:32.182353704Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpl72x4viw/port-1926008.txt","pid":1926008,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-10-08T16:38:32.183512473Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1926008}
+{"time":"2025-10-08T16:38:32.184652392Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1926008-1926185-2566029014/socket","Net":"unix"}}
+{"time":"2025-10-08T16:38:32.187758092Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-10-08T16:38:32.198021978Z","level":"INFO","msg":"handleInformInit: received","streamId":"76mxu43t","id":"1(@)"}
+{"time":"2025-10-08T16:38:33.339879992Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"76mxu43t","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.126902419Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"76mxu43t","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.130282224Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"76mxu43t","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192589494Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192627935Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192634215Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-10-08T16:44:00.192643625Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192695596Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192701166Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-10-08T16:44:00.192695596Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1926008-1926185-2566029014/socket","Net":"unix"}}
+{"time":"2025-10-08T16:44:00.192730717Z","level":"INFO","msg":"server is closed"}

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-10-08T16:38:32.19998745Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-08T16:38:33.339827981Z","level":"INFO","msg":"stream: created new stream","id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339874102Z","level":"INFO","msg":"stream: started","id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339905492Z","level":"INFO","msg":"handler: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339893552Z","level":"INFO","msg":"writer: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:38:33.339947873Z","level":"INFO","msg":"sender: started","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:43:58.756754711Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.006574606}],"total_operations":1}}
+{"time":"2025-10-08T16:43:59.766243448Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-10-08T16:44:00.127335826Z","level":"INFO","msg":"stream: closing","id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.127349836Z","level":"INFO","msg":"handler: closed","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.128408003Z","level":"INFO","msg":"sender: closed","stream_id":"76mxu43t"}
+{"time":"2025-10-08T16:44:00.128424754Z","level":"INFO","msg":"stream: closed","id":"76mxu43t"}

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/logs/debug.log ADDED Viewed

File without changes

cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/run-76mxu43t.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:592c2180c1f58861cad10bcce3042c15f3fa428cb470d5da0ac15865d108e860
+size 205758

eraser_flow_matching/step11500-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
+size 1331

eraser_flow_matching/step12000-action-head/metadata.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:995307502120af3866f237cd0bc484fc848a652539d28e53cbea882abc16ba6b
+size 1331

eraser_flow_matching/step12000-unsharded/config.yaml ADDED Viewed

	@@ -0,0 +1,322 @@

+run_name: eraser_20251011_163756
+seed: 6198
+epoch: null
+dry_run: false
+model:
+  d_model: 3584
+  n_heads: 28
+  n_kv_heads: 4
+  qkv_bias: true
+  clip_qkv: null
+  n_layers: 28
+  mlp_ratio: 4
+  mlp_hidden_size: 37888
+  activation_type: swiglu
+  block_type: sequential
+  block_group_size: 1
+  rope: true
+  rope_full_precision: true
+  rope_theta: 1000000.0
+  vision_backbone:
+    image_model_type: openai
+    image_default_input_size:
+    - 336
+    - 336
+    image_patch_size: 14
+    image_pos_patch_size: 14
+    image_emb_dim: 1024
+    image_num_heads: 16
+    image_num_key_value_heads: 16
+    image_num_layers: 23
+    image_head_dim: 64
+    image_mlp_dim: 4096
+    image_mlp_activations: quick_gelu
+    image_dropout_rate: 0.0
+    image_num_pos: 577
+    image_norm_eps: 1.0e-05
+    attention_dropout: 0.0
+    residual_dropout: 0.0
+    initializer_range: 0.02
+    fsdp_wrap: false
+    resize_mode: default
+  vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
+  llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
+  low_cpu_fsdp: true
+  attention_type: sdpa
+  float32_attention: true
+  attention_dropout: 0.0
+  attention_layer_norm: false
+  residual_dropout: 0.1
+  response_residual_dropout: 0.0
+  embedding_dropout: 0.0
+  layer_norm_type: rms
+  layer_norm_with_affine: true
+  layer_norm_eps: 1.0e-06
+  attention_layer_norm_with_affine: true
+  max_sequence_length: 4096
+  max_position_embeddings: null
+  include_bias: false
+  bias_for_layer_norm: null
+  scale_logits: false
+  vocab_size: 152064
+  embedding_size: 152064
+  ff_out_size: null
+  additional_vocab_size: 128
+  new_embedding_init_range: 0.02
+  weight_tying: false
+  init_device: null
+  init_fn: normal
+  init_std: 0.02
+  init_cutoff_factor: null
+  norm_after: false
+  precision: amp_bf16
+  max_crops: 12
+  crop_mode: overlap-and-resize-c2
+  use_col_tokens: true
+  prompt_type: uber_model
+  system_prompt_kind: demo_or_style
+  message_formatting: role
+  always_start_with_space: true
+  multi_annotation_weighting: root_subsegments
+  default_inference_len: 65
+  overlap_margins:
+  - 4
+  - 4
+  pad_value: 0.0
+  image_padding_embed: pad_and_partial_pad
+  fix_image_padding: true
+  vit_layers:
+  - -2
+  - -9
+  image_pooling_h: 2
+  image_pooling_w: 2
+  image_pooling_2d: attention_meanq
+  image_projector: mlp
+  image_feature_dropout: 0.0
+  initializer_range: 0.02
+  normalize_input_embeds: false
+  use_position_ids: true
+  head_dim: null
+  action_tokenizer:
+    identifier: physical-intelligence/fast
+    tokenizer_dir: null
+  action_dim: 7
+  horizon: 8
+  tokenizer:
+    identifier: Qwen/Qwen2-7B
+    tokenizer_dir: null
+  pad_tokenizer: true
+  moe_num_experts: 8
+  moe_top_k: 2
+  moe_mlp_impl: sparse
+  moe_log_expert_assignment: false
+  moe_shared_expert: false
+  moe_lbl_in_fp32: false
+  moe_interleave: false
+  moe_loss_weight: 0.1
+  moe_zloss_weight: null
+  moe_dropless: true
+  moe_capacity_factor: 1.25
+  action_head: flow_matching
+  num_diffusion_steps: 1000
+  num_diffusion_inference_steps: 30
+  use_proprio: true
+  action_head_dit_hidden_size: 1152
+  action_head_dit_depth: 28
+  action_head_dit_num_heads: 16
+  llm_causal_attention: false
+  action_use_left_eef: true
+  action_use_mobile_base: false
+allow_resume: false
+ft_llm: true
+ft_vit: false
+ft_connector: false
+ft_embedding: lm_head
+lora: false
+use_lora: false
+lora_rank: 8
+lora_llm: false
+lora_vit: false
+lora_connector: false
+early_exit: false
+train_exit_random_layer: false
+optimizer:
+  name: adamw
+  learning_rate: 0.0001
+  weight_decay: 0.01
+  betas:
+  - 0.9
+  - 0.95
+  eps: 1.0e-05
+  connector_learning_rate: 0.0002
+  vit_learning_rate: 6.0e-06
+  llm_learning_rate: 5.0e-05
+  connector_weight_decay: 0.0
+  vit_weight_decay: 0.0
+  llm_weight_decay: 0.0
+  connector_betas:
+  - 0.9
+  - 0.95
+  vit_betas:
+  - 0.9
+  - 0.95
+  llm_betas:
+  - 0.9
+  - 0.95
+  connector_eps: 1.0e-06
+  vit_eps: 1.0e-06
+  llm_eps: 1.0e-06
+  metrics_log_interval: 20
+scheduler:
+  name: multimodal
+  units: steps
+  t_warmup: 100
+  t_max: null
+  alpha_f: 0.1
+  connector_t_warmup: 200
+  vit_t_warmup: 2000
+  llm_t_warmup: 2000
+  grad_clip_warmup_steps: null
+  grad_clip_warmup_factor: null
+  warmup_min_lr: 0.0
+data:
+  dataset: vla_dataset_realworld
+  mixture: null
+  root_size_mixture: null
+  split: train
+  seed: 95818
+  shuffle_messages: false
+  pad: to_max
+  sequence_length: 1600
+  shuffle: true
+  for_inference: false
+  multi_modal: torch
+  num_workers: 0
+  drop_last: true
+  pin_memory: true
+  prefetch_factor: null
+  persistent_workers: false
+  timeout: 0
+  rlds_dataset_name: a1_real_world
+  rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
+  use_wrist_image: true
+  use_proprio: true
+  rlds_shuffle_buffer_size: 100000
+  rlds_traj_threads: 8
+  rlds_read_threads: 8
+  lerobot_episode_index_start: null
+  lerobot_episode_index_end: null
+restore_dataloader: true
+fast_forward_batches: null
+evaluators:
+- label: val
+  data:
+    dataset: vla_dataset_realworld
+    mixture: null
+    root_size_mixture: null
+    split: validation
+    seed: null
+    shuffle_messages: false
+    pad: to_max
+    sequence_length: 1600
+    shuffle: false
+    for_inference: false
+    multi_modal: torch
+    num_workers: 0
+    drop_last: true
+    pin_memory: true
+    prefetch_factor: null
+    persistent_workers: true
+    timeout: 0
+    rlds_dataset_name: libero_4_task_suites_no_noops
+    rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
+    use_wrist_image: true
+    use_proprio: true
+    rlds_shuffle_buffer_size: 256000
+    rlds_traj_threads: 8
+    rlds_read_threads: 8
+    lerobot_episode_index_start: 353
+    lerobot_episode_index_end: 765
+  device_eval_batch_size: null
+  subset_num_batches: 64
+  max_examples: null
+  max_new_tokens: 448
+  mm_evaluator: null
+  save_dir: null
+  save_to_checkpoint_dir: false
+  eval_name: null
+  skip_if_metrics_cached: true
+eval_interval: 0
+inf_eval_interval: -1
+inf_evaluators: []
+save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_flow_matching
+remote_save_folder: null
+canceled_check_interval: 50
+save_interval: 500
+save_interval_unsharded: 500
+save_interval_ephemeral: null
+save_interval_action_head: 500
+save_num_checkpoints_to_keep: 1
+save_num_unsharded_checkpoints_to_keep: 1
+save_num_action_head_checkpoints_to_keep: 2
+save_overwrite: true
+force_save_unsharded: false
+no_pre_train_checkpoint: true
+initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_model_config: null
+checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
+load_path: null
+load_path_sharded_checkpointer: null
+reset_optimizer_state: false
+reset_trainer_state: false
+save_dataloader_state: false
+reset_dataloader_state: false
+keep_lr_on_load: true
+sharded_checkpointer: torch_legacy
+max_duration: 500000
+global_train_batch_size: 126
+device_train_batch_size: 15
+device_train_microbatch_size: 16
+device_eval_batch_size: 4
+eval_subset_num_batches: -1
+eval_on_load: false
+device_inf_eval_batch_size: 16
+inf_eval_subset_num_batches: -1
+device_train_grad_accum: 0
+max_grad_norm: 1.0
+multi_component_grad_norm: true
+batch_divisor: global_batch
+max_grad_norm_ratio: null
+precision: amp_bf16
+wandb:
+  project: a1-realworld
+  entity: henryeap
+  group: null
+  name: eraser_20251011_163756
+  tags:
+  - watching
+  log_artifacts: false
+  rank_zero_only: true
+  log_interval: 1
+speed_monitor:
+  window_size: 20
+  gpu_flops_available: null
+console_log_interval: 1
+gen1_gc_interval: 1
+compile: null
+fsdp:
+  use_orig_params: true
+  sharding_strategy: FULL_SHARD
+  wrapping_strategy: by_block_and_size
+  precision: float
+  hybrid_sharding_num_model_replicas: null
+softmax_auxiliary_loss: true
+softmax_auxiliary_loss_scale: 0.0001
+time_limit: null
+extra_steps_after_cancel: 10
+python_profiling: false
+torch_profiling: false
+stop_at: 500000
+stop_after: null
+activation_checkpointing: whole_layer
+fused_loss: null

eraser_flow_matching/step12000-unsharded/train.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27b7944a86ee4e879d03cec156d2128c480602778b651762977174d39e5f94ab
+size 15061

eraser_flow_matching/wandb/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,8 @@

+{"time":"2025-10-11T16:38:32.587302182Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
+{"time":"2025-10-11T16:38:33.732885593Z","level":"INFO","msg":"stream: created new stream","id":"yqnt28c8"}
+{"time":"2025-10-11T16:38:33.732932354Z","level":"INFO","msg":"stream: started","id":"yqnt28c8"}
+{"time":"2025-10-11T16:38:33.732959824Z","level":"INFO","msg":"writer: started","stream_id":"yqnt28c8"}
+{"time":"2025-10-11T16:38:33.732985135Z","level":"INFO","msg":"handler: started","stream_id":"yqnt28c8"}
+{"time":"2025-10-11T16:38:33.732961384Z","level":"INFO","msg":"sender: started","stream_id":"yqnt28c8"}
+{"time":"2025-10-13T08:15:22.219814038Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-10-13T20:01:35.03758236Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}

eraser_flow_matching/wandb/wandb/debug.log ADDED Viewed

File without changes