Update configuration_Llamoe.py
Browse files- configuration_Llamoe.py +9 -10
configuration_Llamoe.py
CHANGED
@@ -13,25 +13,24 @@ LLAMOE_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
13 |
class LlamoeConfig(PretrainedConfig):
|
14 |
model_type = "Llamoe"
|
15 |
keys_to_ignore_at_inference = ["past_key_values"]
|
16 |
-
|
17 |
def __init__(
|
18 |
self,
|
19 |
vocab_size=32000,
|
20 |
-
hidden_size=
|
21 |
-
intermediate_size=
|
22 |
-
num_hidden_layers=
|
23 |
-
num_attention_heads=
|
24 |
-
num_key_value_heads=
|
25 |
head_dim=256,
|
26 |
-
hidden_act="
|
27 |
-
max_position_embeddings=
|
28 |
initializer_range=0.02,
|
29 |
-
rms_norm_eps=1e-
|
30 |
use_cache=True,
|
31 |
pad_token_id=0,
|
32 |
eos_token_id=1,
|
33 |
bos_token_id=2,
|
34 |
-
tie_word_embeddings=
|
35 |
rope_theta=10000.0,
|
36 |
attention_bias=False,
|
37 |
attention_dropout=0.0,
|
|
|
13 |
class LlamoeConfig(PretrainedConfig):
|
14 |
model_type = "Llamoe"
|
15 |
keys_to_ignore_at_inference = ["past_key_values"]
|
|
|
16 |
def __init__(
|
17 |
self,
|
18 |
vocab_size=32000,
|
19 |
+
hidden_size=4096,
|
20 |
+
intermediate_size=11008,
|
21 |
+
num_hidden_layers=32,
|
22 |
+
num_attention_heads=32,
|
23 |
+
num_key_value_heads=32,
|
24 |
head_dim=256,
|
25 |
+
hidden_act="silu",
|
26 |
+
max_position_embeddings=4096,
|
27 |
initializer_range=0.02,
|
28 |
+
rms_norm_eps=1e-05,
|
29 |
use_cache=True,
|
30 |
pad_token_id=0,
|
31 |
eos_token_id=1,
|
32 |
bos_token_id=2,
|
33 |
+
tie_word_embeddings=false,
|
34 |
rope_theta=10000.0,
|
35 |
attention_bias=False,
|
36 |
attention_dropout=0.0,
|