IffYuan
/

Embodied-R1.5-VLA-SIMPLER

Model card Files Files and versions

Embodied-R1.5-VLA-SIMPLER / config.yaml

IffYuan's picture

Upload config.yaml with huggingface_hub

d1a238c verified 15 days ago

history blame contribute delete

1.89 kB

	datasets:
	vla_data:
	CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
	Locate their bounding boxes in [x1,y1,x2,y2] format.
	data_mix: bridge_rt_1
	data_root_dir: ./playground/Datasets/OXE_LEROBOT
	dataset_py: lerobot_datasets
	image_size:
	- 224
	- 224
	per_device_batch_size: 8
	video_backend: torchvision_av
	framework:
	action_model:
	action_dim: 7
	action_horizon: 16
	action_model_type: DiT-B
	add_pos_embed: true
	diffusion_model_cfg:
	cross_attention_dim: 4096
	dropout: 0.2
	final_dropout: true
	interleave_self_attention: true
	norm_type: ada_norm
	num_layers: 16
	output_dim: 1024
	positional_embeddings: null
	future_action_window_size: 15
	hidden_size: 1024
	max_seq_len: 1024
	noise_beta_alpha: 1.5
	noise_beta_beta: 1.0
	noise_s: 0.999
	num_inference_timesteps: 4
	num_target_vision_tokens: 32
	num_timestep_buckets: 1000
	past_action_window_size: 0
	state_dim: 7
	name: QwenGR00T
	qwenvl:
	base_vlm: /mnt/18T/starVLAproject/Qwen3-VL-8B-Instruct
	output_dir: /starvla/Checkpoints/qwen3vl_bridge_rt1_QwenGR00T_2node_0203_1256
	run_id: qwen3vl_bridge_rt1_QwenGR00T_2node_0203_1256
	run_root_dir: /starvla/Checkpoints
	seed: 42
	trainer:
	eval_interval: 500
	freeze_modules: true
	gradient_accumulation_steps: 1
	gradient_clipping: 1.0
	is_resume: false
	learning_rate:
	action_model: 0.0001
	base: 1.0e-05
	qwen_vl_interface: 1.0e-05
	logging_frequency: 50
	lr_scheduler_type: cosine_with_min_lr
	max_train_steps: 100000
	num_warmup_steps: 10000
	optimizer:
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-08
	weight_decay: 1.0e-08
	repeated_diffusion_steps: 4
	save_interval: 10000
	scheduler_specific_kwargs:
	min_lr: 5.0e-07
	wandb_entity: xiguapi
	wandb_project: Qwen3VL_Bridge_RT1_QwenGR00T