mjschock commited on
Commit
f19bbda
1 Parent(s): b71bd08

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -0
  2. configuration_mamba.py +30 -0
config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "d_model": 768,
3
  "fused_add_norm": true,
4
  "model_type": "mamba",
 
1
  {
2
+ "auto_map": {
3
+ "AutoConfig": "configuration_mamba.MambaConfig"
4
+ },
5
  "d_model": 768,
6
  "fused_add_norm": true,
7
  "model_type": "mamba",
configuration_mamba.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mamba_ssm
2
+ from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig, PreTrainedModel
3
+
4
+ mamba_config_defaults = mamba_ssm.models.config_mamba.MambaConfig()
5
+
6
+ class MambaConfig(PretrainedConfig):
7
+ model_type = "mamba"
8
+
9
+ def __init__(
10
+ self,
11
+ d_model: int = mamba_config_defaults.d_model,
12
+ fused_add_norm: bool = mamba_config_defaults.fused_add_norm,
13
+ n_layer: int = mamba_config_defaults.n_layer,
14
+ pad_vocab_size_multiple: int = mamba_config_defaults.pad_vocab_size_multiple,
15
+ residual_in_fp32: bool = mamba_config_defaults.residual_in_fp32,
16
+ rms_norm: bool = mamba_config_defaults.rms_norm,
17
+ ssm_cfg: dict = mamba_config_defaults.ssm_cfg,
18
+ vocab_size: int = mamba_config_defaults.vocab_size,
19
+ **kwargs,
20
+ ):
21
+ self.d_model = d_model
22
+ self.fused_add_norm = fused_add_norm
23
+ self.n_layer = n_layer
24
+ self.pad_vocab_size_multiple = pad_vocab_size_multiple
25
+ self.residual_in_fp32 = residual_in_fp32
26
+ self.rms_norm = rms_norm
27
+ self.ssm_cfg = ssm_cfg
28
+ self.vocab_size = vocab_size
29
+
30
+ super().__init__(**kwargs)