{ "_name_or_path": "axial_caduceus_1200", "_target_": "models.configuration_caduceus.AxialCaduceusConfig", "architectures": [ "AxialCaduceusForMaskedLM" ], "auto_map": { "AutoConfig": "configuration_caduceus.AxialCaduceusConfig", "AutoModelForMaskedLM": "modeling_caduceus.AxialCaduceusForMaskedLM" }, "bidirectional": "true,", "bidirectional_strategy": "add", "bidirectional_weight_tie": true, "complement_map": null, "d_intermediate": 0, "d_model": 128, "fused_add_norm": true, "initializer_cfg": { "initializer_range": 0.02, "n_residuals_per_layer": 1, "rescale_prenorm_residual": true }, "model_name": "axial_caduceus", "model_type": "axial_caduceus", "n_layer": 4, "norm_epsilon": 1e-05, "pad_vocab_size_multiple": 8, "pos_embeddings": "None", "rcps": false, "residual_in_fp32": false, "rms_norm": true, "row_first": true, "ssm_cfg": { "bias": false, "conv_bias": true, "d_conv": 4, "d_state": 16, "dt_init_floor": 0.0001, "dt_max": 0.1, "dt_min": 0.001, "expand": 4 }, "torch_dtype": "float32", "transformers_version": "4.45.2", "use_mamba2": true, "vocab_size": 16 }