Update configuration_codeshell.py
Browse files- configuration_codeshell.py +17 -4
configuration_codeshell.py
CHANGED
@@ -17,7 +17,6 @@
|
|
17 |
# its original forms to accommodate minor architectural differences compared to
|
18 |
# GPTBigCode Configuration that trained the model.
|
19 |
|
20 |
-
# coding=utf-8
|
21 |
# Copyright 2023 The BigCode team and HuggingFace Inc. team.
|
22 |
#
|
23 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -31,7 +30,7 @@
|
|
31 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
32 |
# See the License for the specific language governing permissions and
|
33 |
# limitations under the License.
|
34 |
-
"""
|
35 |
|
36 |
from transformers.configuration_utils import PretrainedConfig
|
37 |
from transformers.utils import logging
|
@@ -51,7 +50,7 @@ class CodeShellConfig(PretrainedConfig):
|
|
51 |
Args:
|
52 |
vocab_size (`int`, *optional*, defaults to 50257):
|
53 |
Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
|
54 |
-
`inputs_ids` passed when calling [`
|
55 |
n_positions (`int`, *optional*, defaults to 1024):
|
56 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
57 |
just in case (e.g., 512 or 1024 or 2048).
|
@@ -86,7 +85,21 @@ class CodeShellConfig(PretrainedConfig):
|
|
86 |
Whether to scale the attention softmax in float32.
|
87 |
attention_type (`bool`, *optional*, defaults to `True`):
|
88 |
Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
model_type = "codeshell"
|
92 |
keys_to_ignore_at_inference = ["past_key_values"]
|
|
|
17 |
# its original forms to accommodate minor architectural differences compared to
|
18 |
# GPTBigCode Configuration that trained the model.
|
19 |
|
|
|
20 |
# Copyright 2023 The BigCode team and HuggingFace Inc. team.
|
21 |
#
|
22 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
30 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
31 |
# See the License for the specific language governing permissions and
|
32 |
# limitations under the License.
|
33 |
+
""" Shell configuration"""
|
34 |
|
35 |
from transformers.configuration_utils import PretrainedConfig
|
36 |
from transformers.utils import logging
|
|
|
50 |
Args:
|
51 |
vocab_size (`int`, *optional*, defaults to 50257):
|
52 |
Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
|
53 |
+
`inputs_ids` passed when calling [`ShellModel`].
|
54 |
n_positions (`int`, *optional*, defaults to 1024):
|
55 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
56 |
just in case (e.g., 512 or 1024 or 2048).
|
|
|
85 |
Whether to scale the attention softmax in float32.
|
86 |
attention_type (`bool`, *optional*, defaults to `True`):
|
87 |
Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
|
88 |
+
Example:
|
89 |
+
|
90 |
+
```python
|
91 |
+
>>> from configuration_codeshell import CodeShellConfig
|
92 |
+
>>> from modeling_codeshell import CodeShellForCausalLM
|
93 |
+
|
94 |
+
>>> # Initializing a CodeShell configuration
|
95 |
+
>>> configuration = CodeShellConfig()
|
96 |
+
|
97 |
+
>>> # Initializing a model (with random weights) from the configuration
|
98 |
+
>>> model = CodeShellForCausalLM(configuration)
|
99 |
+
|
100 |
+
>>> # Accessing the model configuration
|
101 |
+
>>> configuration = model.config
|
102 |
+
```"""
|
103 |
|
104 |
model_type = "codeshell"
|
105 |
keys_to_ignore_at_inference = ["past_key_values"]
|