mlabonne commited on
Commit
ce810be
1 Parent(s): 7fbaa49

Update configuration_phi.py

Browse files
Files changed (1) hide show
  1. configuration_phi.py +4 -0
configuration_phi.py CHANGED
@@ -27,6 +27,8 @@ class PhiConfig(PretrainedConfig):
27
  n_inner: Optional[int] = None,
28
  n_head: int = 16,
29
  n_head_kv: Optional[int] = None,
 
 
30
  rotary_dim: Optional[int] = 32,
31
  activation_function: Optional[str] = "gelu_new",
32
  flash_attn: bool = False,
@@ -48,6 +50,8 @@ class PhiConfig(PretrainedConfig):
48
  self.n_inner = n_inner
49
  self.n_head = n_head
50
  self.n_head_kv = n_head_kv
 
 
51
  self.rotary_dim = min(rotary_dim, n_embd // n_head)
52
  self.activation_function = activation_function
53
  self.flash_attn = flash_attn
 
27
  n_inner: Optional[int] = None,
28
  n_head: int = 16,
29
  n_head_kv: Optional[int] = None,
30
+ num_experts_per_tok: int = 2,
31
+ num_local_experts: int = 4,
32
  rotary_dim: Optional[int] = 32,
33
  activation_function: Optional[str] = "gelu_new",
34
  flash_attn: bool = False,
 
50
  self.n_inner = n_inner
51
  self.n_head = n_head
52
  self.n_head_kv = n_head_kv
53
+ self.num_experts_per_tok = num_experts_per_tok
54
+ self.num_local_experts = num_local_experts
55
  self.rotary_dim = min(rotary_dim, n_embd // n_head)
56
  self.activation_function = activation_function
57
  self.flash_attn = flash_attn