damerajee commited on
Commit
7fe1774
1 Parent(s): 5193c45

Update configuration_Llamoe.py

Browse files
Files changed (1) hide show
  1. configuration_Llamoe.py +9 -10
configuration_Llamoe.py CHANGED
@@ -13,25 +13,24 @@ LLAMOE_PRETRAINED_CONFIG_ARCHIVE_MAP = {
13
  class LlamoeConfig(PretrainedConfig):
14
  model_type = "Llamoe"
15
  keys_to_ignore_at_inference = ["past_key_values"]
16
-
17
  def __init__(
18
  self,
19
  vocab_size=32000,
20
- hidden_size=3072,
21
- intermediate_size=24576,
22
- num_hidden_layers=28,
23
- num_attention_heads=16,
24
- num_key_value_heads=16,
25
  head_dim=256,
26
- hidden_act="gelu",
27
- max_position_embeddings=8192,
28
  initializer_range=0.02,
29
- rms_norm_eps=1e-6,
30
  use_cache=True,
31
  pad_token_id=0,
32
  eos_token_id=1,
33
  bos_token_id=2,
34
- tie_word_embeddings=True,
35
  rope_theta=10000.0,
36
  attention_bias=False,
37
  attention_dropout=0.0,
 
13
  class LlamoeConfig(PretrainedConfig):
14
  model_type = "Llamoe"
15
  keys_to_ignore_at_inference = ["past_key_values"]
 
16
  def __init__(
17
  self,
18
  vocab_size=32000,
19
+ hidden_size=4096,
20
+ intermediate_size=11008,
21
+ num_hidden_layers=32,
22
+ num_attention_heads=32,
23
+ num_key_value_heads=32,
24
  head_dim=256,
25
+ hidden_act="silu",
26
+ max_position_embeddings=4096,
27
  initializer_range=0.02,
28
+ rms_norm_eps=1e-05,
29
  use_cache=True,
30
  pad_token_id=0,
31
  eos_token_id=1,
32
  bos_token_id=2,
33
+ tie_word_embeddings=false,
34
  rope_theta=10000.0,
35
  attention_bias=False,
36
  attention_dropout=0.0,