|
run_id: 0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k |
|
run_root_dir: ./playground/Checkpoints |
|
seed: 42 |
|
trackers: |
|
- jsonl |
|
- wandb |
|
wandb_entity: michaelyu-1101-fudanuniversity |
|
wandb_project: Internvla |
|
is_debug: false |
|
framework: |
|
framework_py: DinoQFormerACT |
|
qwenvl: |
|
base_vlm: /mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000/ |
|
attn_implementation: flash_attention_2 |
|
vl_hidden_dim: 2048 |
|
dino: |
|
dino_backbone: dinov2_vitl14 |
|
layer_qformer: |
|
qformer_end_layer: 37 |
|
qformer_start_layer: 36 |
|
num_query_tokens: 64 |
|
grad_scale: 0.5 |
|
action_model: |
|
action_model_type: DiT-B |
|
action_hidden_dim: 768 |
|
action_dim: 7 |
|
use_ema: false |
|
future_action_window_size: 7 |
|
past_action_window_size: 0 |
|
repeated_diffusion_steps: 8 |
|
reduce_in_full_precision: true |
|
datasets: |
|
vlm_data: |
|
dataformat: llava_json |
|
dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en |
|
eval_dataset: aokvqa_cauldron_llava_format |
|
data_flatten: false |
|
base_interval: 2 |
|
max_pixels: 50176 |
|
min_pixels: 784 |
|
fix_image_size: |
|
- 224 |
|
- 224 |
|
model_max_length: 1024 |
|
model_type: qwen2.5vl |
|
per_device_batch_size: 4 |
|
vla_data: |
|
dataset_py: lerobot_libero |
|
data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA |
|
data_mix: libero_object |
|
action_type: delta_qpos |
|
CoT_prompt: Your task is {instruction}. To identify the key objects for your task. |
|
Locate their bounding boxes in [x1,y1,x2,y2] format. |
|
CoT_answer: bbox |
|
default_image_resolution: |
|
- 3 |
|
- 224 |
|
- 224 |
|
per_device_batch_size: 16 |
|
load_all_data_for_training: true |
|
obs: |
|
- image_0 |
|
trainer: |
|
epochs: 100 |
|
max_train_steps: 100000 |
|
num_warmup_steps: 5000 |
|
save_interval: 10000 |
|
eval_interval: 1000 |
|
learning_rate: |
|
base: 2.5e-05 |
|
lr_scheduler_type: cosine_with_min_lr |
|
scheduler_specific_kwargs: |
|
min_lr: 1.0e-06 |
|
freeze_modules: '' |
|
loss_scale: |
|
vla: 1.0 |
|
vlm: 0.1 |
|
max_grad_norm: 1.0 |
|
warmup_ratio: 0.1 |
|
weight_decay: 0.0 |
|
logging_frequency: 10 |
|
gradient_clipping: 1.0 |
|
gradient_accumulation_steps: 1 |
|
optimizer: |
|
name: AdamW |
|
betas: |
|
- 0.9 |
|
- 0.95 |
|
eps: 1.0e-08 |
|
weight_decay: 1.0e-08 |
|
is_resume: false |
|
resume_epoch: null |
|
resume_step: null |
|
enable_gradient_checkpointing: true |
|
enable_mixed_precision_training: true |
|
output_dir: ./playground/Checkpoints/0911_libero_object_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_8_pretrained_vlm_20k |
|
|