| run_name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| model: |
| model_name: molmoact |
| data_formatter: |
| prompt_templates: uber_model_v2 |
| message_format: qwen3 |
| system_prompt: demo_or_style_v2 |
| always_start_with_space: false |
| default_inference_len: 65 |
| select_answer: best |
| debug: false |
| image_last: false |
| format_message_list: null |
| p_one_message: 0.0 |
| eval_system_prompt_mapping: null |
| p_choice_content_in_mc: 1.0 |
| template_video_mc_questions: true |
| pointing_format: html-v2 |
| points_decimal_places: 1 |
| use_seperate_non_pointing_qa_style: false |
| timestamp_mode: 50-percent-seconds |
| output_timestamp_mode: seconds |
| seconds_decimal_places: 1 |
| p_multi_point_all_image: 0.5 |
| use_seperate_count_without_pointing_style: false |
| sample_random_initial_point: true |
| llm: |
| d_model: 2560 |
| n_heads: 32 |
| n_kv_heads: 8 |
| head_dim: 128 |
| qkv_bias: false |
| clip_qkv: null |
| n_layers: 36 |
| mlp_ratio: 4 |
| mlp_hidden_size: 19456 |
| activation_type: swiglu |
| block_type: sequential |
| rope: true |
| rope_full_precision: true |
| rope_theta: 5000000.0 |
| rope_type: default |
| rope_factor: null |
| rope_high_freq_factor: null |
| rope_low_freq_factor: null |
| rope_original_max_position_embeddings: null |
| rope_attention_factor: null |
| rope_beta_fast: null |
| rope_beta_slow: null |
| rope_mscale: null |
| rope_mscale_all_dim: null |
| rope_truncate: null |
| attention_type: sdpa |
| full_attention_layers: null |
| sliding_attention_rope_scaling: false |
| float32_attention: true |
| attention_dropout: 0.0 |
| attention_layer_norm: true |
| attention_layer_norm_type: qwen3 |
| residual_dropout: 0.1 |
| response_residual_dropout: 0.0 |
| layer_norm_type: rms |
| layer_norm_with_affine: true |
| layer_norm_eps: 1.0e-06 |
| attention_layer_norm_with_affine: true |
| max_sequence_length: 8192 |
| max_position_embeddings: null |
| include_bias: false |
| bias_for_layer_norm: null |
| norm_after: false |
| moe_num_experts: 8 |
| moe_top_k: 2 |
| moe_mlp_impl: sparse |
| moe_log_expert_assignment: false |
| moe_shared_expert: false |
| moe_lbl_in_fp32: false |
| moe_interleave: false |
| moe_loss_weight: 0.1 |
| moe_zloss_weight: null |
| moe_dropless: true |
| moe_capacity_factor: 1.25 |
| embedding_dropout: 0.0 |
| scale_logits: false |
| vocab_size: 151936 |
| additional_vocab_size: 128 |
| weight_tying: true |
| embedding_size: 151936 |
| use_position_ids: true |
| tokenizer: |
| identifier: Qwen/Qwen3-4B-Instruct-2507 |
| tokenizer_dir: null |
| init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-4b-instruct.pt |
| init_incremental: null |
| new_embedding_init_range: 0.02 |
| initializer_range: 0.02 |
| normalize_input_embeds: false |
| activation_checkpoint: whole_layer |
| compile: blocks |
| fix_pad_tokenizer: false |
| init_std: 0.02 |
| init_fn: normal |
| init_cutoff_factor: null |
| vision_backbone: |
| vit: |
| image_model_type: siglip |
| image_default_input_size: |
| - 378 |
| - 378 |
| image_patch_size: 14 |
| image_pos_patch_size: 14 |
| image_emb_dim: 1152 |
| image_num_heads: 16 |
| image_num_key_value_heads: 16 |
| image_num_layers: 27 |
| image_head_dim: 72 |
| image_mlp_dim: 4304 |
| image_mlp_activations: gelu_pytorch_tanh |
| image_dropout_rate: 0.0 |
| image_num_pos: 729 |
| image_norm_eps: 1.0e-06 |
| attention_dropout: 0.0 |
| residual_dropout: 0.0 |
| initializer_range: 0.02 |
| float32_attention: true |
| attention_type: sdpa |
| sdpa_backend: all |
| activation_checkpointing: true |
| init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt |
| resize_mode: siglip |
| pad_value: 0.0 |
| normalize: siglip |
| image_pooling_2d: attention_meanq |
| pooling_attention_mask: true |
| image_projector: mlp |
| image_padding_embed: null |
| vit_layers: |
| - -3 |
| - -9 |
| skip_unused_layers: true |
| use_deepstack: false |
| share_connector: false |
| image_feature_dropout: 0.0 |
| connector_activation_checkpointing: true |
| compile_vit: blocks |
| pool_size_embeds: null |
| compile_connector: null |
| normalize_on_gpu: true |
| use_image_augmentation: true |
| use_resize_bottleneck: false |
| mm_preprocessor: |
| max_answer_len: null |
| last_message_loss_only: false |
| max_text_tokens: null |
| loss_token_weighting: root_subsegments_root_tokens |
| max_frames: 1 |
| frame_sample_mode: uniform_last_frame |
| candidate_sampling_fps: |
| - 0.25 |
| - 0.5 |
| - 1.0 |
| - 2.0 |
| - 4.0 |
| - 6.0 |
| - 8.0 |
| - 16.0 |
| cache_videos: true |
| loading_method: torchcodec_exact |
| max_fps: |
| - 2.0 |
| time_sampling: true |
| time_mode: per-frame-compact |
| subtitle_mode: frame_1 |
| max_crops: 1 |
| overlap_margins: |
| - 4.0 |
| - 4.0 |
| use_col_tokens: false |
| periodic_high_res_frame: null |
| high_low_train_mode: local_rnd |
| high_res_frame_sample_options: null |
| periodic_sample_rate_training: |
| 4: |
| - 0.9 |
| - 0.03 |
| - 0.03 |
| - 0.04 |
| 3: |
| - 0.6 |
| - 0.2 |
| - 0.2 |
| skip_low_res_in_high_low: false |
| pooling_w: 3 |
| pooling_h: 3 |
| high_res_pooling_w: null |
| high_res_pooling_h: null |
| query_based_resolution_selection: false |
| max_queries_for_resolution_selection: 8 |
| use_frame_special_tokens: true |
| frame_sel_clip_identifier: google/siglip2-so400m-patch14-384 |
| image_padding_mask: false |
| max_subtitle_tokens: null |
| image: |
| crop_mode: resize |
| use_col_tokens: true |
| max_crops: 8 |
| high_res_max_crops: 24 |
| p_high_res: 0.0 |
| pooling_w: 2 |
| pooling_h: 2 |
| overlap_margins: |
| - 4 |
| - 4 |
| max_images: 5 |
| max_multi_image_crops: 8 |
| multi_image_pooling_w: 2 |
| multi_image_pooling_h: 2 |
| use_single_crop_col_tokens: false |
| use_single_crop_start_token: true |
| topk: null |
| prune_from_frame: 0 |
| bi_directional_attn: image_tokens |
| shared_low_high_embedding: true |
| debug: null |
| cp_enabled: false |
| apply_cp_to_vision_backbone: false |
| action_dim: 20 |
| action_horizon: 16 |
| n_action_steps: 8 |
| n_obs_steps: 1 |
| action_expert: |
| max_horizon: 32 |
| action_dim: 20 |
| hidden_size: 768 |
| num_layers: 36 |
| num_heads: 8 |
| mlp_ratio: 4.0 |
| timestep_embed_dim: 256 |
| dropout: 0.0 |
| attn_dropout: 0.0 |
| context_layer_norm: true |
| action_expert_layer_mode: per_layer |
| flow_matching_num_steps: 10 |
| flow_matching_cutoff: 0.999 |
| flow_matching_beta_alpha: 1.0 |
| flow_matching_beta_beta: 1.5 |
| num_flow_timestamps: 8 |
| same_noise_per_time: false |
| robot_preprocessor: |
| stats_by_repo: |
| synthmanip: |
| observation.state: |
| min: |
| - -4.904874324798584 |
| - -4.564780235290527 |
| - -3.5160739421844482 |
| - -2.356419563293457 |
| - -0.47234979271888733 |
| - -2.0865397453308105 |
| - -3.343071222305298 |
| - -5.8824052810668945 |
| - -1.7488995790481567 |
| - -2.967109203338623 |
| - -0.11299018561840057 |
| - -2.3546268939971924 |
| - -3.1416664123535156 |
| - -2.0946199893951416 |
| - -3.2890703678131104 |
| - -6.282893657684326 |
| - -1.7483078241348267 |
| - -2.967064142227173 |
| - -0.12049419432878494 |
| - -1.778153419494629 |
| - -1.7587945461273193 |
| - -1.5871200561523438 |
| max: |
| - 17.08185577392578 |
| - 33.73189163208008 |
| - 3.2411913871765137 |
| - 2.356658697128296 |
| - 3.1416971683502197 |
| - 2.1008245944976807 |
| - 0.07229717075824738 |
| - 6.270575523376465 |
| - 2.0102994441986084 |
| - 2.9668161869049072 |
| - 0.021467044949531555 |
| - 2.3977394104003906 |
| - 0.34489157795906067 |
| - 2.0900635719299316 |
| - 0.07242166996002197 |
| - 6.27663516998291 |
| - 2.0076160430908203 |
| - 2.9636759757995605 |
| - 0.04509617015719414 |
| - 0.919683575630188 |
| - 1.6717331409454346 |
| - 1.1039749383926392 |
| action: |
| q01: |
| - -0.04400388523936272 |
| - -0.044572047889232635 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.037506889551877975 |
| - -0.03562070056796074 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.04800133779644966 |
| - -0.05000000074505806 |
| - -100.0 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.04927435144782066 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.0456085205078125 |
| - -0.05000000074505806 |
| - -100.0 |
| - -0.025820335373282433 |
| q99: |
| - 0.04579437896609306 |
| - 0.04565873369574547 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.03847877308726311 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 100.0 |
| - 0.05000000074505806 |
| - 0.03608553484082222 |
| - 0.04896605759859085 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 100.0 |
| - 0.7379999756813049 |
| default_repo_id: synthmanip |
| action_key: action |
| state_keys: |
| - observation.state |
| action_norm_mode: quantiles |
| state_norm_mode: min_max |
| robot_postprocessor: |
| stats_by_repo: |
| synthmanip: |
| observation.state: |
| min: |
| - -4.904874324798584 |
| - -4.564780235290527 |
| - -3.5160739421844482 |
| - -2.356419563293457 |
| - -0.47234979271888733 |
| - -2.0865397453308105 |
| - -3.343071222305298 |
| - -5.8824052810668945 |
| - -1.7488995790481567 |
| - -2.967109203338623 |
| - -0.11299018561840057 |
| - -2.3546268939971924 |
| - -3.1416664123535156 |
| - -2.0946199893951416 |
| - -3.2890703678131104 |
| - -6.282893657684326 |
| - -1.7483078241348267 |
| - -2.967064142227173 |
| - -0.12049419432878494 |
| - -1.778153419494629 |
| - -1.7587945461273193 |
| - -1.5871200561523438 |
| max: |
| - 17.08185577392578 |
| - 33.73189163208008 |
| - 3.2411913871765137 |
| - 2.356658697128296 |
| - 3.1416971683502197 |
| - 2.1008245944976807 |
| - 0.07229717075824738 |
| - 6.270575523376465 |
| - 2.0102994441986084 |
| - 2.9668161869049072 |
| - 0.021467044949531555 |
| - 2.3977394104003906 |
| - 0.34489157795906067 |
| - 2.0900635719299316 |
| - 0.07242166996002197 |
| - 6.27663516998291 |
| - 2.0076160430908203 |
| - 2.9636759757995605 |
| - 0.04509617015719414 |
| - 0.919683575630188 |
| - 1.6717331409454346 |
| - 1.1039749383926392 |
| action: |
| q01: |
| - -0.04400388523936272 |
| - -0.044572047889232635 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.037506889551877975 |
| - -0.03562070056796074 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.04800133779644966 |
| - -0.05000000074505806 |
| - -100.0 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.04927435144782066 |
| - -0.05000000074505806 |
| - -0.05000000074505806 |
| - -0.0456085205078125 |
| - -0.05000000074505806 |
| - -100.0 |
| - -0.025820335373282433 |
| q99: |
| - 0.04579437896609306 |
| - 0.04565873369574547 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.03847877308726311 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 100.0 |
| - 0.05000000074505806 |
| - 0.03608553484082222 |
| - 0.04896605759859085 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 0.05000000074505806 |
| - 100.0 |
| - 0.7379999756813049 |
| default_repo_id: synthmanip |
| action_key: action |
| state_keys: |
| - observation.state |
| action_norm_mode: quantiles |
| state_norm_mode: min_max |
| parallelism: |
| data_parallel_replicate_degree: 1 |
| enable_compiled_autograd: false |
| data_parallel_shard_degree: -1 |
| fsdp_reshard_after_forward: default |
| context_parallel_config: |
| degree: 1 |
| attention_type: ulysses |
| load_balancer: ulysses |
| head_stride: 1 |
| tensor_parallel_config: |
| degree: 1 |
| enable_async: false |
| data_parallel_config: |
| name: fsdp |
| param_dtype: null |
| reduce_dtype: float32 |
| num_replicas: null |
| shard_degree: null |
| wrapping_strategy: full |
| prefetch_factor: 0 |
| context_parallel_rotate_method: allgather |
| seed: 6198 |
| epoch: null |
| dry_run: false |
| ft_llm: true |
| ft_vit: false |
| ft_connector: false |
| ft_embedding: lm_head |
| optimizer: |
| name: adamw |
| learning_rate: 0.0001 |
| weight_decay: 0.01 |
| betas: |
| - 0.9 |
| - 0.95 |
| eps: 1.0e-05 |
| connector_learning_rate: 5.0e-06 |
| vit_learning_rate: 5.0e-06 |
| llm_learning_rate: 1.0e-05 |
| frame_selector_learning_rate: 0.0001 |
| temporal_token_scorer_learning_rate: 0.0001 |
| action_expert_learning_rate: 0.0001 |
| connector_weight_decay: 0.0 |
| vit_weight_decay: 0.0 |
| llm_weight_decay: 0.0 |
| frame_selector_weight_decay: 0.01 |
| temporal_token_scorer_weight_decay: 0.01 |
| action_expert_weight_decay: 0.0 |
| connector_betas: |
| - 0.9 |
| - 0.95 |
| vit_betas: |
| - 0.9 |
| - 0.95 |
| llm_betas: |
| - 0.9 |
| - 0.95 |
| frame_selector_betas: |
| - 0.9 |
| - 0.95 |
| temporal_token_scorer_betas: |
| - 0.9 |
| - 0.95 |
| action_expert_betas: |
| - 0.9 |
| - 0.95 |
| connector_eps: 1.0e-06 |
| vit_eps: 1.0e-06 |
| llm_eps: 1.0e-06 |
| frame_selector_eps: 1.0e-06 |
| temporal_token_scorer_eps: 1.0e-06 |
| action_expert_eps: 1.0e-06 |
| metrics_log_interval: -1 |
| scheduler: |
| name: multimodal |
| units: steps |
| t_warmup: 100 |
| t_max: null |
| alpha_f: 0.1 |
| connector_t_warmup: 200 |
| vit_t_warmup: 200 |
| llm_t_warmup: 2000 |
| frame_selector_t_warmup: 200 |
| temporal_token_scorer_t_warmup: 200 |
| action_expert_t_warmup: 200 |
| grad_clip_warmup_steps: null |
| grad_clip_warmup_factor: null |
| warmup_min_lr: 0.0 |
| data: |
| dataset: null |
| mixture: |
| synthmanip/task_0: 1.0 |
| synthmanip/task_1: 1.0 |
| root_size_mixture: null |
| kwargs_mixture: null |
| split: train |
| seed: 50189 |
| pad: to_max |
| sequence_length: 1024 |
| max_text_seq_len: null |
| shuffle: true |
| start_index: 0 |
| packing: null |
| enable_variable_sized_token_pooling: true |
| num_workers: 4 |
| drop_last: true |
| pin_memory: true |
| prefetch_factor: 4 |
| persistent_workers: false |
| timeout: 300 |
| action_data: null |
| action_loader_rate: null |
| action_batch_interval: 1 |
| restore_dataloader: true |
| fast_forward_batches: null |
| evaluators: [] |
| eval_interval: 0 |
| inf_evaluators: [] |
| inf_eval_interval: 1000 |
| eval_on_last_step: true |
| eval_on_load: false |
| eval_on: [] |
| save_folder: /weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| checkpointer_config: |
| save_thread_count: null |
| load_thread_count: null |
| pre_download: false |
| work_dir: null |
| throttle_uploads: false |
| canceled_check_interval: 50 |
| save_interval: 4000 |
| save_at: null |
| save_final_optim: false |
| save_num_checkpoints_to_keep: 3 |
| checkpoint_retention_frequency: 10000 |
| save_final_unsharded_checkpoint: false |
| save_interval_ephemeral: null |
| save_overwrite: true |
| load_path: null |
| reset_optimizer_state: false |
| reset_trainer_state: false |
| initial_model_checkpoint: /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/ |
| allow_resume: true |
| max_duration: 100000 |
| global_train_batch_size: 1024 |
| device_train_microbatch_size: 8 |
| max_grad_norm: 1.0 |
| multi_component_grad_norm: true |
| batch_divisor: global_batch |
| max_grad_norm_ratio: null |
| precision: amp_bf16 |
| wandb: |
| project: whirl-molmoflow-rby1 |
| entity: prior-ai2 |
| group: null |
| name: molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| tags: |
| - watching |
| log_artifacts: false |
| rank_zero_only: true |
| log_interval: 20 |
| allow_resume: true |
| finish_on_sigterm: true |
| beaker_log_interval: 50 |
| speed_monitor: |
| window_size: 20 |
| gpu_flops_available: null |
| console_log_interval: 20 |
| enable_timing_logs: false |
| gen1_gc_interval: 1 |
| compile: |
| mode: default |
| fullgraph: false |
| dynamic: false |
| backend: inductor |
| activation_checkpointing: true |
| fsdp: |
| fsdp2: true |
| precision: pure |
| use_orig_params: true |
| wrapping_strategy: null |
| sharding_strategy: FULL_SHARD |
| hybrid_sharding_num_model_replicas: null |
| softmax_auxiliary_loss: false |
| softmax_auxiliary_loss_scale: 0.0001 |
| response_logits_only: true |
| saliency_score_loss_wt: null |
| frame_score_loss_wt: null |
| frame_score_loss_type: mse |
| frame_score_loss_target: 0.7 |
| time_limit: null |
| extra_steps_after_cancel: 0 |
| python_profiling: false |
| torch_profiling: false |
| stop_at: 100000 |
| stop_after: null |
| fused_loss: false |
| compile_loss: true |
| runtime_data: |
| args: launch_scripts/train_synthmanip.py /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/ |
| --data_paths /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/DoorOpeningDataGenConfig |
| /weka/prior/datasets/robomolmo/feb15_franka_and_rby1/DoorOpeningDataGenConfig |
| --no_val --dataset_sample_rates 1.0 1.0 --stats_path=/weka/prior/datasets/robomolmo/rby1_multitask_norm_stats.yaml |
| --action_preset RBY1_multitask --camera_preset RBY1_full_with_head_gopro --wandb.name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| --wandb.entity=prior-ai2 --wandb.project=whirl-molmoflow-rby1 --seq_len=1024 --max_duration=100000 |
| --device_batch_size=8 --global_batch_size=1024 --log_interval=20 --model.mm_preprocessor.use_frame_special_tokens=True |
| --model.mm_preprocessor.max_subtitle_tokens=null --data.num_workers=4 --prefetch_factor=4 |
| --save_interval=4000 --save_num_checkpoints_to_keep=3 --checkpoint_retention_frequency=10000 |
| --save_folder=/weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| --exp_name=molmoflow-rby1-door-expert-16node-03-05-23-17-56_bs_1024_dbs_8_step_100000_llmlr_1e-5 |
| --data.packing=null --model.mm_preprocessor.image.max_images=5 --model.mm_preprocessor.image.crop_mode=resize |
| --model.mm_preprocessor.max_frames=1 --model.same_noise_per_time=False --model.num_flow_timestamps=8 |
| --use_point_prompts --randomize_prompts --point_prompt_camera=head_camera --max_points_in_conditioning_frame=1 |
| --conditioning_frame=random_first_10 --cameras_to_warp head_camera --img_aug --ft_llm=True |
| --scheduler.llm_t_warmup=2000 --optimizer.llm_learning_rate=1e-5 |
| hostname: jupiter-cs-aus-121.reviz.ai2.in |
| date: 03/05/2026, 22:21 |
| world_size: 128 |
| resuming_from: null |
| beaker_experiment_id: 01KK018HKCWPW1677ZM8GQAYXG |
| beaker_experiment_url: https://beaker.org/ex/01KK018HKCWPW1677ZM8GQAYXG |
| wandb_id: kg1npwco |
| wandb_url: https://wandb.ai/prior-ai2/whirl-molmoflow-rby1/runs/kg1npwco |
| distributed_eval_enabled: false |
| distributed_eval_benchmark_path: /weka/oe/rohunt/robo-bench/FrankaPickandPlaceDroidBench_5ep_json_benchmark |
| distributed_eval_config_cls: launch_scripts.synthvla.configure_mujoco_thor:FrankaState8ClampConfig |
| distributed_eval_task_horizon: 300 |
| distributed_eval_num_worker_jobs: 1 |
| distributed_eval_wandb_project: mjthor-online-eval |
| distributed_eval_workspace: ai2/robo-molmo |
| distributed_eval_clusters: |
| - ai2/saturn |
| - ai2/neptune |
| - ai2/rhea |
| - ai2/ceres |
| distributed_eval_priority: high |
| distributed_eval_preemptible: true |
|
|