gugarosa commited on
Commit
45f4b21
1 Parent(s): 0254d42

Enables to toggle fused_dense, flash_rotary and attn_pdrop in the configuration.

Browse files
configuration_mixformer_sequential.py CHANGED
@@ -30,6 +30,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
30
  n_head_kv: Optional[int] = None,
31
  rotary_dim: Optional[int] = 32,
32
  activation_function: Optional[str] = "gelu_new",
 
 
 
33
  embd_pdrop: Optional[float] = 0.0,
34
  resid_pdrop: Optional[float] = 0.0,
35
  layer_norm_epsilon: Optional[float] = 1e-5,
@@ -47,6 +50,9 @@ class MixFormerSequentialConfig(PretrainedConfig):
47
  self.n_head_kv = n_head_kv
48
  self.rotary_dim = min(rotary_dim, n_embd // n_head)
49
  self.activation_function = activation_function
 
 
 
50
  self.embd_pdrop = embd_pdrop
51
  self.resid_pdrop = resid_pdrop
52
  self.layer_norm_epsilon = layer_norm_epsilon
 
30
  n_head_kv: Optional[int] = None,
31
  rotary_dim: Optional[int] = 32,
32
  activation_function: Optional[str] = "gelu_new",
33
+ flash_rotary: bool = False,
34
+ fused_dense: bool = False,
35
+ attn_pdrop: Optional[float] = 0.0,
36
  embd_pdrop: Optional[float] = 0.0,
37
  resid_pdrop: Optional[float] = 0.0,
38
  layer_norm_epsilon: Optional[float] = 1e-5,
 
50
  self.n_head_kv = n_head_kv
51
  self.rotary_dim = min(rotary_dim, n_embd // n_head)
52
  self.activation_function = activation_function
53
+ self.flash_rotary = flash_rotary
54
+ self.fused_dense = fused_dense
55
+ self.attn_pdrop = attn_pdrop
56
  self.embd_pdrop = embd_pdrop
57
  self.resid_pdrop = resid_pdrop
58
  self.layer_norm_epsilon = layer_norm_epsilon