Q-bert commited on
Commit
c3bb21c
1 Parent(s): 166a7df

Create configuration_mamba.py

Browse files
Files changed (1) hide show
  1. configuration_mamba.py +43 -0
configuration_mamba.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Optional , Union
3
+
4
+ from transformers import PretrainedConfig
5
+ class MambaConfig(PretrainedConfig):
6
+ model_type = "mamba"
7
+ def __init__(
8
+ self,
9
+ vocab_size=50277,
10
+ d_state=16,
11
+ d_model=2560,
12
+ d_conv=4,
13
+ expand=2,
14
+ conv_bias=True,
15
+ bias=False,
16
+ n_layer=64,
17
+ dt_rank: Union[int, str] = "auto",
18
+ pad_vocab_size_multiple=8,
19
+ initializer_range=0.02,
20
+ **kwargs,
21
+ ):
22
+ self.vocab_size = vocab_size
23
+ self.n_layer= n_layer
24
+ self.conv_bias = conv_bias
25
+ self.expand = expand
26
+ self.pad_vocab_size_multiple = pad_vocab_size_multiple
27
+ self.d_conv = d_conv
28
+ self.d_model = d_model
29
+ self.d_state = d_state
30
+ self.d_inner = int(self.expand * self.d_model)
31
+ self.dt_rank = dt_rank
32
+ self.initializer_range = initializer_range
33
+ self.bias = bias
34
+
35
+ if self.dt_rank == 'auto':
36
+ self.dt_rank = math.ceil(self.d_model / 16)
37
+
38
+ if self.vocab_size % self.pad_vocab_size_multiple != 0:
39
+ self.vocab_size += (self.pad_vocab_size_multiple
40
+ - self.vocab_size % self.pad_vocab_size_multiple)
41
+ super().__init__(
42
+ **kwargs,
43
+ )