BAAI
/

AquilaMoE

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ AquilaMoE model configuration"""
 from transformers import PretrainedConfig
 from transformers.utils import logging
@@ -23,7 +24,7 @@ logger = logging.get_logger(__name__)
 class AquilaMoeConfig(PretrainedConfig):
     r"""
     Args:
-        vocab_size (`int`, *optional*, defaults to 32000):
             Vocabulary size of the AquilaMoE model. Defines the number of different tokens that can be represented by the
             `inputs_ids` passed when calling [`AquilaMoE`]
         hidden_size (`int`, *optional*, defaults to 4096):
@@ -85,7 +86,7 @@ class AquilaMoeConfig(PretrainedConfig):
     def __init__(
         self,
-        vocab_size=32000,
         hidden_size=4096,
         intermediate_size=14336,
         num_hidden_layers=32,

 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ AquilaMoE model configuration"""
+# Copied from transformers.models.mixtral.configuration_mixtral
 from transformers import PretrainedConfig
 from transformers.utils import logging
 class AquilaMoeConfig(PretrainedConfig):
     r"""
     Args:
+        vocab_size (`int`, *optional*, defaults to 150000):
             Vocabulary size of the AquilaMoE model. Defines the number of different tokens that can be represented by the
             `inputs_ids` passed when calling [`AquilaMoE`]
         hidden_size (`int`, *optional*, defaults to 4096):
     def __init__(
         self,
+        vocab_size=150000,
         hidden_size=4096,
         intermediate_size=14336,
         num_hidden_layers=32,