| model_name: molmo |
| llm: |
| d_model: 3584 |
| n_heads: 28 |
| n_kv_heads: 4 |
| head_dim: null |
| qkv_bias: true |
| clip_qkv: null |
| n_layers: 28 |
| mlp_ratio: 4 |
| mlp_hidden_size: 37888 |
| activation_type: swiglu |
| block_type: sequential |
| rope: true |
| rope_full_precision: true |
| rope_theta: 1000000.0 |
| rope_type: default |
| rope_factor: null |
| rope_high_freq_factor: null |
| rope_low_freq_factor: null |
| rope_original_max_position_embeddings: null |
| attention_type: sdpa |
| float32_attention: true |
| attention_dropout: 0.0 |
| attention_layer_norm: false |
| attention_layer_norm_type: olmo |
| residual_dropout: 0.1 |
| response_residual_dropout: 0.0 |
| layer_norm_type: rms |
| layer_norm_with_affine: true |
| layer_norm_eps: 1.0e-06 |
| attention_layer_norm_with_affine: true |
| max_sequence_length: 4096 |
| max_position_embeddings: null |
| include_bias: false |
| bias_for_layer_norm: null |
| norm_after: false |
| moe_num_experts: 8 |
| moe_top_k: 2 |
| moe_mlp_impl: sparse |
| moe_log_expert_assignment: false |
| moe_shared_expert: false |
| moe_lbl_in_fp32: false |
| moe_interleave: false |
| moe_loss_weight: 0.1 |
| moe_zloss_weight: null |
| moe_dropless: true |
| moe_capacity_factor: 1.25 |
| embedding_dropout: 0.0 |
| scale_logits: false |
| vocab_size: 152064 |
| additional_vocab_size: 128 |
| weight_tying: false |
| embedding_size: 152064 |
| use_position_ids: true |
| tokenizer: |
| identifier: Qwen/Qwen2.5-7B |
| tokenizer_dir: null |
| depth_tokens: true |
| init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt |
| init_incremental: null |
| new_embedding_init_range: 0.02 |
| initializer_range: 0.02 |
| normalize_input_embeds: false |
| activation_checkpoint: whole_layer |
| compile: blocks |
| fix_pad_tokenizer: false |
| resize_vocab: false |
| init_std: 0.02 |
| init_fn: normal |
| init_cutoff_factor: null |
| vision_backbone: |
| vit: |
| image_model_type: siglip |
| image_default_input_size: |
| - 378 |
| - 378 |
| image_patch_size: 14 |
| image_pos_patch_size: 14 |
| image_emb_dim: 1152 |
| image_num_heads: 16 |
| image_num_key_value_heads: 16 |
| image_num_layers: 27 |
| image_head_dim: 72 |
| image_mlp_dim: 4304 |
| image_mlp_activations: gelu_pytorch_tanh |
| image_dropout_rate: 0.0 |
| image_num_pos: 729 |
| image_norm_eps: 1.0e-06 |
| attention_dropout: 0.0 |
| residual_dropout: 0.0 |
| initializer_range: 0.02 |
| float32_attention: true |
| attention_type: sdpa |
| activation_checkpointing: true |
| init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt |
| resize_mode: siglip |
| pad_value: 0.0 |
| normalize: siglip |
| image_pooling_2d: attention_meanq |
| pooling_attention_mask: false |
| image_projector: mlp |
| image_padding_embed: null |
| vit_layers: |
| - -3 |
| - -9 |
| skip_unused_layers: true |
| image_feature_dropout: 0.0 |
| connector_activation_checkpointing: true |
| compile_vit: blocks |
| data_formatter: |
| prompt_templates: uber_model |
| message_format: role |
| system_prompt: demo_or_style |
| always_start_with_space: false |
| default_inference_len: 65 |
| select_answer: best |
| debug: false |
| image_last: false |
| format_message_list: null |
| p_one_message: 0.0 |
| mm_preprocessor: |
| crop_mode: overlap-and-resize-c2 |
| max_crops: 8 |
| max_images: 2 |
| max_multi_image_crops: 8 |
| pooling_w: 2 |
| pooling_h: 2 |
| overlap_margins: |
| - 4 |
| - 4 |
| use_col_tokens: true |
| loss_token_weighting: root_subsegments |
| legacy_image_mask: false |
| max_answer_len: null |
| img_aug: true |
| bi_directional_attn: null |
| lora_enable: true |
| lora_rank: 32 |
| lora_alpha: 16 |
| lora_dropout: 0.0 |
| lora_bias: none |
| n_action_bins: 256 |
| norm_stats: |
| molmoact: |
| action: |
| mean: |
| - 0.0005706787342205644 |
| - 0.0002448957529850304 |
| - -3.5987635783385485e-05 |
| - 0.00021597897284664214 |
| - -0.0004896928439848125 |
| - -0.000241481073317118 |
| - 0.5570635199546814 |
| std: |
| - 0.005207270849496126 |
| - 0.007506529800593853 |
| - 0.006415561307221651 |
| - 0.013248044066131115 |
| - 0.010928540490567684 |
| - 0.014873150736093521 |
| - 0.49715080857276917 |
| min: |
| - -0.07434078305959702 |
| - -0.07339745759963989 |
| - -0.06539416313171387 |
| - -0.1688285619020462 |
| - -0.10289879888296127 |
| - -0.2667275667190552 |
| - 0.0 |
| max: |
| - 0.06042003631591797 |
| - 0.09417290985584259 |
| - 0.07019275426864624 |
| - 0.2616892158985138 |
| - 0.11751057207584381 |
| - 0.16968433558940887 |
| - 1.0 |
| q01: |
| - -0.01538565568625927 |
| - -0.021047022193670273 |
| - -0.01688069850206375 |
| - -0.044314172118902206 |
| - -0.03890235349535942 |
| - -0.04788423702120781 |
| - 0.0 |
| q99: |
| - 0.014661382883787155 |
| - 0.026515591889619827 |
| - 0.021398313343524933 |
| - 0.04216696694493294 |
| - 0.03401297703385353 |
| - 0.04957397282123566 |
| - 1.0 |
| num_entries: 1560068 |
|
|