{ "architectures": [ "MambaForCausalLM" ], "bias": false, "conv_bias": true, "d_conv": 4, "d_inner": 3072, "d_model": 1536, "d_state": 16, "dt_rank": 96, "expand": 2, "initializer_range": 0.02, "model_type": "mamba", "n_layer": 48, "pad_vocab_size_multiple": 8, "torch_dtype": "float32", "transformers_version": "4.35.2", "vocab_size": 50280 }