Update configuration_aquilamoe.py
Browse files
configuration_aquilamoe.py
CHANGED
@@ -12,6 +12,7 @@
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
""" AquilaMoE model configuration"""
|
|
|
15 |
|
16 |
from transformers import PretrainedConfig
|
17 |
from transformers.utils import logging
|
@@ -23,7 +24,7 @@ logger = logging.get_logger(__name__)
|
|
23 |
class AquilaMoeConfig(PretrainedConfig):
|
24 |
r"""
|
25 |
Args:
|
26 |
-
vocab_size (`int`, *optional*, defaults to
|
27 |
Vocabulary size of the AquilaMoE model. Defines the number of different tokens that can be represented by the
|
28 |
`inputs_ids` passed when calling [`AquilaMoE`]
|
29 |
hidden_size (`int`, *optional*, defaults to 4096):
|
@@ -85,7 +86,7 @@ class AquilaMoeConfig(PretrainedConfig):
|
|
85 |
|
86 |
def __init__(
|
87 |
self,
|
88 |
-
vocab_size=
|
89 |
hidden_size=4096,
|
90 |
intermediate_size=14336,
|
91 |
num_hidden_layers=32,
|
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
""" AquilaMoE model configuration"""
|
15 |
+
# Copied from transformers.models.mixtral.configuration_mixtral
|
16 |
|
17 |
from transformers import PretrainedConfig
|
18 |
from transformers.utils import logging
|
|
|
24 |
class AquilaMoeConfig(PretrainedConfig):
|
25 |
r"""
|
26 |
Args:
|
27 |
+
vocab_size (`int`, *optional*, defaults to 150000):
|
28 |
Vocabulary size of the AquilaMoE model. Defines the number of different tokens that can be represented by the
|
29 |
`inputs_ids` passed when calling [`AquilaMoE`]
|
30 |
hidden_size (`int`, *optional*, defaults to 4096):
|
|
|
86 |
|
87 |
def __init__(
|
88 |
self,
|
89 |
+
vocab_size=150000,
|
90 |
hidden_size=4096,
|
91 |
intermediate_size=14336,
|
92 |
num_hidden_layers=32,
|