File size: 6,074 Bytes
1624e76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# coding=utf-8
# Copyright 2022 The OpenBMB Team and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" CpmBee model configuration"""
from typing import List, Optional, Tuple, Union
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
CPMBEE_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"openbmb/cpm-bee-10b": "https://huggingface.co/openbmb/cpm-bee-10b/resolve/main/config.json",
"openbmb/cpm-bee-5b": "https://huggingface.co/openbmb/cpm-bee-5b/resolve/main/config.json",
"openbmb/cpm-bee-2b": "https://huggingface.co/openbmb/cpm-bee-2b/resolve/main/config.json",
"openbmb/cpm-bee-1b": "https://huggingface.co/openbmb/cpm-bee-1b/resolve/main/config.json",
# See all CpmBee models at https://huggingface.co/models?filter=cpmbee
}
class CpmBeeConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`CpmBeeModel`]. It is used to instbeeiate an
CPMBee model according to the specified arguments, defining the model architecture. Instantiating a configuration
with the defaults will yield a similar configuration to that of the CPMBee
[openbmb/cpm-bee-10b](https://huggingface.co/openbmb/cpm-bee-10b) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
vocab_size (`int`, *optional*, defaults to 30720):
Vocabulary size of the CPMBee model. Defines the number of different tokens that can be represented by the
`input` passed when calling [`CpmBeeModel`].
hidden_size (`int`, *optional*, defaults to 4096):
Dimension of the encoder layers.
num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads in the Transformer encoder.
dim_head (`int`, *optional*, defaults to 128):
Dimension of attention heads for each attention layer in the Transformer encoder.
dim_ff (`int`, *optional*, defaults to 10240):
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
num_hidden_layers (`int`, *optional*, defaults to 48):
Number of layers of the Transformer encoder.
dropout_p (`float`, *optional*, defaults to 0.1):
The dropout probabilitiy for all fully connected layers in the embeddings, encoder.
position_bias_num_buckets (`int`, *optional*, defaults to 512):
The number of position_bias buckets.
position_bias_num_segment_buckets (`int`, *optional*, defaults to 32):
The number of segment buckets.
position_bias_max_distance (`int`, *optional*, defaults to 2048):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
eps (`float`, *optional*, defaults to 1e-6):
The epsilon used by the layer normalization layers.
init_std (`float`, *optional*, defaults to 1.0):
Initialize parameters with std = init_std.
use_cache (`bool`, *optional*, defaults to `True`):
Whether to use cache.
distance_scale (`float` or `int`, *optional*, defaults to 16):
Scale the rotary embedding.
mask_modules (`list` or `tuple`, *optional*, defaults to None):
Decides which feedforward block or attention block is pruned.
half (`bool`, *optional*, defaults to `False`):
Decides the model parameters are half-precision or not.
Example:
```python
>>> from transformers import CpmBeeModel, CpmBeeConfig
>>> # Initializing a CPMBee cpm-bee-10b style configuration
>>> configuration = CpmBeeConfig()
>>> # Initializing a model from the cpm-bee-10b style configuration
>>> model = CpmBeeModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "cpmbee"
def __init__(
self,
vocab_size: int = 30720,
hidden_size: int = 4096,
num_attention_heads: int = 64,
dim_head: int = 64,
dim_ff: int = 10240,
num_hidden_layers: int = 32,
dropout_p: int = 0.0,
position_bias_num_buckets: int = 256,
position_bias_num_segment_buckets: int = 32,
position_bias_max_distance: int = 2048,
eps: int = 1e-6,
init_std: float = 1.0,
use_cache: bool = True,
distance_scale: Union[int, float] = 16,
mask_modules: Optional[Union[List, Tuple]] = None,
half: bool = False,
**kwargs,
):
super().__init__(**kwargs)
self.position_bias_num_segment_buckets = position_bias_num_segment_buckets
self.hidden_size = hidden_size
self.num_attention_heads = num_attention_heads
self.dim_head = dim_head
self.dim_ff = dim_ff
self.num_hidden_layers = num_hidden_layers
self.position_bias_num_buckets = position_bias_num_buckets
self.position_bias_max_distance = position_bias_max_distance
self.dropout_p = dropout_p
self.eps = eps
self.use_cache = use_cache
self.vocab_size = vocab_size
self.init_std = init_std
self.distance_scale = distance_scale
self.half = half
self.mask_modules = mask_modules
|