{ "_name_or_path": "mjschock/mamba-130m", "architectures": [ "MambaModelForCausalLM" ], "auto_map": { "AutoConfig": "mjschock/mamba-130m--configuration_mamba.MambaConfig", "AutoModel": "mjschock/mamba-130m--modeling_mamba.MambaModel", "AutoModelForCausalLM": "mjschock/mamba-130m--modeling_mamba.MambaModelForCausalLM" }, "bias": false, "conv_bias": true, "d_conv": 4, "d_inner": 1536, "d_model": 768, "d_state": 16, "dt_rank": 48, "expand": 2, "hidden_size": 768, "initializer_range": 0.02, "model_type": "mamba", "n_layer": 24, "pad_vocab_size_multiple": 8, "torch_dtype": "float32", "transformers_version": "4.37.2", "vocab_size": 50280 }