Upload MolmoBot RBY1 DoorOpening weights (step56000 unsharded)

94e9157 verified 6 days ago

19.8 kB

	run_name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	model:
	model_name: molmoact
	data_formatter:
	prompt_templates: uber_model_v2
	message_format: qwen3
	system_prompt: demo_or_style_v2
	always_start_with_space: false
	default_inference_len: 65
	select_answer: best
	debug: false
	image_last: false
	format_message_list: null
	p_one_message: 0.0
	eval_system_prompt_mapping: null
	p_choice_content_in_mc: 1.0
	template_video_mc_questions: true
	pointing_format: html-v2
	points_decimal_places: 1
	use_seperate_non_pointing_qa_style: false
	timestamp_mode: 50-percent-seconds
	output_timestamp_mode: seconds
	seconds_decimal_places: 1
	p_multi_point_all_image: 0.5
	use_seperate_count_without_pointing_style: false
	sample_random_initial_point: true
	llm:
	d_model: 2560
	n_heads: 32
	n_kv_heads: 8
	head_dim: 128
	qkv_bias: false
	clip_qkv: null
	n_layers: 36
	mlp_ratio: 4
	mlp_hidden_size: 19456
	activation_type: swiglu
	block_type: sequential
	rope: true
	rope_full_precision: true
	rope_theta: 5000000.0
	rope_type: default
	rope_factor: null
	rope_high_freq_factor: null
	rope_low_freq_factor: null
	rope_original_max_position_embeddings: null
	rope_attention_factor: null
	rope_beta_fast: null
	rope_beta_slow: null
	rope_mscale: null
	rope_mscale_all_dim: null
	rope_truncate: null
	attention_type: sdpa
	full_attention_layers: null
	sliding_attention_rope_scaling: false
	float32_attention: true
	attention_dropout: 0.0
	attention_layer_norm: true
	attention_layer_norm_type: qwen3
	residual_dropout: 0.1
	response_residual_dropout: 0.0
	layer_norm_type: rms
	layer_norm_with_affine: true
	layer_norm_eps: 1.0e-06
	attention_layer_norm_with_affine: true
	max_sequence_length: 8192
	max_position_embeddings: null
	include_bias: false
	bias_for_layer_norm: null
	norm_after: false
	moe_num_experts: 8
	moe_top_k: 2
	moe_mlp_impl: sparse
	moe_log_expert_assignment: false
	moe_shared_expert: false
	moe_lbl_in_fp32: false
	moe_interleave: false
	moe_loss_weight: 0.1
	moe_zloss_weight: null
	moe_dropless: true
	moe_capacity_factor: 1.25
	embedding_dropout: 0.0
	scale_logits: false
	vocab_size: 151936
	additional_vocab_size: 128
	weight_tying: true
	embedding_size: 151936
	use_position_ids: true
	tokenizer:
	identifier: Qwen/Qwen3-4B-Instruct-2507
	tokenizer_dir: null
	init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-4b-instruct.pt
	init_incremental: null
	new_embedding_init_range: 0.02
	initializer_range: 0.02
	normalize_input_embeds: false
	activation_checkpoint: whole_layer
	compile: blocks
	fix_pad_tokenizer: false
	init_std: 0.02
	init_fn: normal
	init_cutoff_factor: null
	vision_backbone:
	vit:
	image_model_type: siglip
	image_default_input_size:
	- 378
	- 378
	image_patch_size: 14
	image_pos_patch_size: 14
	image_emb_dim: 1152
	image_num_heads: 16
	image_num_key_value_heads: 16
	image_num_layers: 27
	image_head_dim: 72
	image_mlp_dim: 4304
	image_mlp_activations: gelu_pytorch_tanh
	image_dropout_rate: 0.0
	image_num_pos: 729
	image_norm_eps: 1.0e-06
	attention_dropout: 0.0
	residual_dropout: 0.0
	initializer_range: 0.02
	float32_attention: true
	attention_type: sdpa
	sdpa_backend: all
	activation_checkpointing: true
	init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
	resize_mode: siglip
	pad_value: 0.0
	normalize: siglip
	image_pooling_2d: attention_meanq
	pooling_attention_mask: true
	image_projector: mlp
	image_padding_embed: null
	vit_layers:
	- -3
	- -9
	skip_unused_layers: true
	use_deepstack: false
	share_connector: false
	image_feature_dropout: 0.0
	connector_activation_checkpointing: true
	compile_vit: blocks
	pool_size_embeds: null
	compile_connector: null
	normalize_on_gpu: true
	use_image_augmentation: true
	use_resize_bottleneck: false
	mm_preprocessor:
	max_answer_len: null
	last_message_loss_only: false
	max_text_tokens: null
	loss_token_weighting: root_subsegments_root_tokens
	max_frames: 1
	frame_sample_mode: uniform_last_frame
	candidate_sampling_fps:
	- 0.25
	- 0.5
	- 1.0
	- 2.0
	- 4.0
	- 6.0
	- 8.0
	- 16.0
	cache_videos: true
	loading_method: torchcodec_exact
	max_fps:
	- 2.0
	time_sampling: true
	time_mode: per-frame-compact
	subtitle_mode: frame_1
	max_crops: 1
	overlap_margins:
	- 4.0
	- 4.0
	use_col_tokens: false
	periodic_high_res_frame: null
	high_low_train_mode: local_rnd
	high_res_frame_sample_options: null
	periodic_sample_rate_training:
	4:
	- 0.9
	- 0.03
	- 0.03
	- 0.04
	3:
	- 0.6
	- 0.2
	- 0.2
	skip_low_res_in_high_low: false
	pooling_w: 3
	pooling_h: 3
	high_res_pooling_w: null
	high_res_pooling_h: null
	query_based_resolution_selection: false
	max_queries_for_resolution_selection: 8
	use_frame_special_tokens: true
	frame_sel_clip_identifier: google/siglip2-so400m-patch14-384
	image_padding_mask: false
	max_subtitle_tokens: null
	image:
	crop_mode: resize
	use_col_tokens: true
	max_crops: 8
	high_res_max_crops: 24
	p_high_res: 0.0
	pooling_w: 2
	pooling_h: 2
	overlap_margins:
	- 4
	- 4
	max_images: 5
	max_multi_image_crops: 8
	multi_image_pooling_w: 2
	multi_image_pooling_h: 2
	use_single_crop_col_tokens: false
	use_single_crop_start_token: true
	topk: null
	prune_from_frame: 0
	bi_directional_attn: image_tokens
	shared_low_high_embedding: true
	debug: null
	cp_enabled: false
	apply_cp_to_vision_backbone: false
	action_dim: 20
	action_horizon: 16
	n_action_steps: 8
	n_obs_steps: 1
	action_expert:
	max_horizon: 32
	action_dim: 20
	hidden_size: 768
	num_layers: 36
	num_heads: 8
	mlp_ratio: 4.0
	timestep_embed_dim: 256
	dropout: 0.0
	attn_dropout: 0.0
	context_layer_norm: true
	action_expert_layer_mode: per_layer
	flow_matching_num_steps: 10
	flow_matching_cutoff: 0.999
	flow_matching_beta_alpha: 1.0
	flow_matching_beta_beta: 1.5
	num_flow_timestamps: 8
	same_noise_per_time: false
	robot_preprocessor:
	stats_by_repo:
	synthmanip:
	observation.state:
	min:
	- -4.904874324798584
	- -4.564780235290527
	- -3.5160739421844482
	- -2.356419563293457
	- -0.47234979271888733
	- -2.0865397453308105
	- -3.343071222305298
	- -5.8824052810668945
	- -1.7488995790481567
	- -2.967109203338623
	- -0.11299018561840057
	- -2.3546268939971924
	- -3.1416664123535156
	- -2.0946199893951416
	- -3.2890703678131104
	- -6.282893657684326
	- -1.7483078241348267
	- -2.967064142227173
	- -0.12049419432878494
	- -1.778153419494629
	- -1.7587945461273193
	- -1.5871200561523438
	max:
	- 17.08185577392578
	- 33.73189163208008
	- 3.2411913871765137
	- 2.356658697128296
	- 3.1416971683502197
	- 2.1008245944976807
	- 0.07229717075824738
	- 6.270575523376465
	- 2.0102994441986084
	- 2.9668161869049072
	- 0.021467044949531555
	- 2.3977394104003906
	- 0.34489157795906067
	- 2.0900635719299316
	- 0.07242166996002197
	- 6.27663516998291
	- 2.0076160430908203
	- 2.9636759757995605
	- 0.04509617015719414
	- 0.919683575630188
	- 1.6717331409454346
	- 1.1039749383926392
	action:
	q01:
	- -0.04400388523936272
	- -0.044572047889232635
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.037506889551877975
	- -0.03562070056796074
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.04800133779644966
	- -0.05000000074505806
	- -100.0
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.04927435144782066
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.0456085205078125
	- -0.05000000074505806
	- -100.0
	- -0.025820335373282433
	q99:
	- 0.04579437896609306
	- 0.04565873369574547
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.03847877308726311
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 100.0
	- 0.05000000074505806
	- 0.03608553484082222
	- 0.04896605759859085
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 100.0
	- 0.7379999756813049
	default_repo_id: synthmanip
	action_key: action
	state_keys:
	- observation.state
	action_norm_mode: quantiles
	state_norm_mode: min_max
	robot_postprocessor:
	stats_by_repo:
	synthmanip:
	observation.state:
	min:
	- -4.904874324798584
	- -4.564780235290527
	- -3.5160739421844482
	- -2.356419563293457
	- -0.47234979271888733
	- -2.0865397453308105
	- -3.343071222305298
	- -5.8824052810668945
	- -1.7488995790481567
	- -2.967109203338623
	- -0.11299018561840057
	- -2.3546268939971924
	- -3.1416664123535156
	- -2.0946199893951416
	- -3.2890703678131104
	- -6.282893657684326
	- -1.7483078241348267
	- -2.967064142227173
	- -0.12049419432878494
	- -1.778153419494629
	- -1.7587945461273193
	- -1.5871200561523438
	max:
	- 17.08185577392578
	- 33.73189163208008
	- 3.2411913871765137
	- 2.356658697128296
	- 3.1416971683502197
	- 2.1008245944976807
	- 0.07229717075824738
	- 6.270575523376465
	- 2.0102994441986084
	- 2.9668161869049072
	- 0.021467044949531555
	- 2.3977394104003906
	- 0.34489157795906067
	- 2.0900635719299316
	- 0.07242166996002197
	- 6.27663516998291
	- 2.0076160430908203
	- 2.9636759757995605
	- 0.04509617015719414
	- 0.919683575630188
	- 1.6717331409454346
	- 1.1039749383926392
	action:
	q01:
	- -0.04400388523936272
	- -0.044572047889232635
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.037506889551877975
	- -0.03562070056796074
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.04800133779644966
	- -0.05000000074505806
	- -100.0
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.04927435144782066
	- -0.05000000074505806
	- -0.05000000074505806
	- -0.0456085205078125
	- -0.05000000074505806
	- -100.0
	- -0.025820335373282433
	q99:
	- 0.04579437896609306
	- 0.04565873369574547
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.03847877308726311
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 100.0
	- 0.05000000074505806
	- 0.03608553484082222
	- 0.04896605759859085
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 0.05000000074505806
	- 100.0
	- 0.7379999756813049
	default_repo_id: synthmanip
	action_key: action
	state_keys:
	- observation.state
	action_norm_mode: quantiles
	state_norm_mode: min_max
	parallelism:
	data_parallel_replicate_degree: 1
	enable_compiled_autograd: false
	data_parallel_shard_degree: -1
	fsdp_reshard_after_forward: default
	context_parallel_config:
	degree: 1
	attention_type: ulysses
	load_balancer: ulysses
	head_stride: 1
	tensor_parallel_config:
	degree: 1
	enable_async: false
	data_parallel_config:
	name: fsdp
	param_dtype: null
	reduce_dtype: float32
	num_replicas: null
	shard_degree: null
	wrapping_strategy: full
	prefetch_factor: 0
	context_parallel_rotate_method: allgather
	seed: 6198
	epoch: null
	dry_run: false
	ft_llm: true
	ft_vit: false
	ft_connector: false
	ft_embedding: lm_head
	optimizer:
	name: adamw
	learning_rate: 0.0001
	weight_decay: 0.01
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-05
	connector_learning_rate: 5.0e-06
	vit_learning_rate: 5.0e-06
	llm_learning_rate: 1.0e-05
	frame_selector_learning_rate: 0.0001
	temporal_token_scorer_learning_rate: 0.0001
	action_expert_learning_rate: 0.0001
	connector_weight_decay: 0.0
	vit_weight_decay: 0.0
	llm_weight_decay: 0.0
	frame_selector_weight_decay: 0.01
	temporal_token_scorer_weight_decay: 0.01
	action_expert_weight_decay: 0.0
	connector_betas:
	- 0.9
	- 0.95
	vit_betas:
	- 0.9
	- 0.95
	llm_betas:
	- 0.9
	- 0.95
	frame_selector_betas:
	- 0.9
	- 0.95
	temporal_token_scorer_betas:
	- 0.9
	- 0.95
	action_expert_betas:
	- 0.9
	- 0.95
	connector_eps: 1.0e-06
	vit_eps: 1.0e-06
	llm_eps: 1.0e-06
	frame_selector_eps: 1.0e-06
	temporal_token_scorer_eps: 1.0e-06
	action_expert_eps: 1.0e-06
	metrics_log_interval: -1
	scheduler:
	name: multimodal
	units: steps
	t_warmup: 100
	t_max: null
	alpha_f: 0.1
	connector_t_warmup: 200
	vit_t_warmup: 200
	llm_t_warmup: 2000
	frame_selector_t_warmup: 200
	temporal_token_scorer_t_warmup: 200
	action_expert_t_warmup: 200
	grad_clip_warmup_steps: null
	grad_clip_warmup_factor: null
	warmup_min_lr: 0.0
	data:
	dataset: null
	mixture:
	synthmanip/task_0: 1.0
	synthmanip/task_1: 1.0
	root_size_mixture: null
	kwargs_mixture: null
	split: train
	seed: 50189
	pad: to_max
	sequence_length: 1024
	max_text_seq_len: null
	shuffle: true
	start_index: 0
	packing: null
	enable_variable_sized_token_pooling: true
	num_workers: 4
	drop_last: true
	pin_memory: true
	prefetch_factor: 4
	persistent_workers: false
	timeout: 300
	action_data: null
	action_loader_rate: null
	action_batch_interval: 1
	restore_dataloader: true
	fast_forward_batches: null
	evaluators: []
	eval_interval: 0
	inf_evaluators: []
	inf_eval_interval: 1000
	eval_on_last_step: true
	eval_on_load: false
	eval_on: []
	save_folder: /weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	checkpointer_config:
	save_thread_count: null
	load_thread_count: null
	pre_download: false
	work_dir: null
	throttle_uploads: false
	canceled_check_interval: 50
	save_interval: 4000
	save_at: null
	save_final_optim: false
	save_num_checkpoints_to_keep: 3
	checkpoint_retention_frequency: 10000
	save_final_unsharded_checkpoint: false
	save_interval_ephemeral: null
	save_overwrite: true
	load_path: null
	reset_optimizer_state: false
	reset_trainer_state: false
	initial_model_checkpoint: /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
	allow_resume: true
	max_duration: 100000
	global_train_batch_size: 1024
	device_train_microbatch_size: 8
	max_grad_norm: 1.0
	multi_component_grad_norm: true
	batch_divisor: global_batch
	max_grad_norm_ratio: null
	precision: amp_bf16
	wandb:
	project: whirl-molmoflow-rby1
	entity: prior-ai2
	group: null
	name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	tags:
	- watching
	log_artifacts: false
	rank_zero_only: true
	log_interval: 20
	allow_resume: true
	finish_on_sigterm: true
	beaker_log_interval: 50
	speed_monitor:
	window_size: 20
	gpu_flops_available: null
	console_log_interval: 20
	enable_timing_logs: false
	gen1_gc_interval: 1
	compile:
	mode: default
	fullgraph: false
	dynamic: false
	backend: inductor
	activation_checkpointing: true
	fsdp:
	fsdp2: true
	precision: pure
	use_orig_params: true
	wrapping_strategy: null
	sharding_strategy: FULL_SHARD
	hybrid_sharding_num_model_replicas: null
	softmax_auxiliary_loss: false
	softmax_auxiliary_loss_scale: 0.0001
	response_logits_only: true
	saliency_score_loss_wt: null
	frame_score_loss_wt: null
	frame_score_loss_type: mse
	frame_score_loss_target: 0.7
	time_limit: null
	extra_steps_after_cancel: 0
	python_profiling: false
	torch_profiling: false
	stop_at: 100000
	stop_after: null
	fused_loss: false
	compile_loss: true
	runtime_data:
	args: launch_scripts/train_synthmanip.py /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
	--data_paths /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/DoorOpeningDataGenConfig
	/weka/prior/datasets/robomolmo/feb15_franka_and_rby1/DoorOpeningDataGenConfig
	--no_val --dataset_sample_rates 1.0 1.0 --stats_path=/weka/prior/datasets/robomolmo/rby1_multitask_norm_stats.yaml
	--action_preset RBY1_multitask --camera_preset RBY1_full_with_head_gopro --wandb.name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	--wandb.entity=prior-ai2 --wandb.project=whirl-molmoflow-rby1 --seq_len=1024 --max_duration=100000
	--device_batch_size=8 --global_batch_size=1024 --log_interval=20 --model.mm_preprocessor.use_frame_special_tokens=True
	--model.mm_preprocessor.max_subtitle_tokens=null --data.num_workers=4 --prefetch_factor=4
	--save_interval=4000 --save_num_checkpoints_to_keep=3 --checkpoint_retention_frequency=10000
	--save_folder=/weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	--exp_name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5
	--data.packing=null --model.mm_preprocessor.image.max_images=5 --model.mm_preprocessor.image.crop_mode=resize
	--model.mm_preprocessor.max_frames=1 --model.same_noise_per_time=False --model.num_flow_timestamps=8
	--use_point_prompts --randomize_prompts --point_prompt_camera=head_camera --max_points_in_conditioning_frame=1
	--conditioning_frame=random_first_10 --cameras_to_warp head_camera --img_aug --ft_llm=True
	--scheduler.llm_t_warmup=2000 --optimizer.llm_learning_rate=1e-5
	hostname: jupiter-cs-aus-121.reviz.ai2.in
	date: 03/05/2026, 22:21
	world_size: 128
	resuming_from: null
	beaker_experiment_id: 01KK018HKCWPW1677ZM8GQAYXG
	beaker_experiment_url: https://beaker.org/ex/01KK018HKCWPW1677ZM8GQAYXG
	wandb_id: kg1npwco
	wandb_url: https://wandb.ai/prior-ai2/whirl-molmoflow-rby1/runs/kg1npwco
	distributed_eval_enabled: false
	distributed_eval_benchmark_path: /weka/oe/rohunt/robo-bench/FrankaPickandPlaceDroidBench_5ep_json_benchmark
	distributed_eval_config_cls: launch_scripts.synthvla.configure_mujoco_thor:FrankaState8ClampConfig
	distributed_eval_task_horizon: 300
	distributed_eval_num_worker_jobs: 1
	distributed_eval_wandb_project: mjthor-online-eval
	distributed_eval_workspace: ai2/robo-molmo
	distributed_eval_clusters:
	- ai2/saturn
	- ai2/neptune
	- ai2/rhea
	- ai2/ceres
	distributed_eval_priority: high
	distributed_eval_preemptible: true