Rename configuration_clex.py to configuration_llama_clex.py
Browse files
configuration_clex.py → configuration_llama_clex.py
RENAMED
@@ -34,11 +34,8 @@ class CLEXLlamaConfig(LlamaConfig):
|
|
34 |
This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
|
35 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
36 |
defaults will yield a similar configuration to that of the LLaMA-7B.
|
37 |
-
|
38 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
39 |
documentation from [`PretrainedConfig`] for more information.
|
40 |
-
|
41 |
-
|
42 |
Args:
|
43 |
vocab_size (`int`, *optional*, defaults to 32000):
|
44 |
Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
|
@@ -86,18 +83,13 @@ class CLEXLlamaConfig(LlamaConfig):
|
|
86 |
these scaling strategies behave:
|
87 |
https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
|
88 |
experimental feature, subject to breaking API changes in future versions.
|
89 |
-
|
90 |
Example:
|
91 |
-
|
92 |
```python
|
93 |
>>> from transformers import LlamaModel, LlamaConfig
|
94 |
-
|
95 |
>>> # Initializing a LLaMA llama-7b style configuration
|
96 |
>>> configuration = LlamaConfig()
|
97 |
-
|
98 |
>>> # Initializing a model from the llama-7b style configuration
|
99 |
>>> model = LlamaModel(configuration)
|
100 |
-
|
101 |
>>> # Accessing the model configuration
|
102 |
>>> configuration = model.config
|
103 |
```"""
|
@@ -118,7 +110,6 @@ class CLEXLlamaConfig(LlamaConfig):
|
|
118 |
self.log_scale = log_scale
|
119 |
self.rope_theta = 10000
|
120 |
self.max_position_embeddings = 4096
|
121 |
-
self.data_length = 4096
|
122 |
self.rope_scaling = rope_scaling
|
123 |
self._rope_scaling_validation()
|
124 |
|
|
|
34 |
This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
|
35 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
36 |
defaults will yield a similar configuration to that of the LLaMA-7B.
|
|
|
37 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
38 |
documentation from [`PretrainedConfig`] for more information.
|
|
|
|
|
39 |
Args:
|
40 |
vocab_size (`int`, *optional*, defaults to 32000):
|
41 |
Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
|
|
|
83 |
these scaling strategies behave:
|
84 |
https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
|
85 |
experimental feature, subject to breaking API changes in future versions.
|
|
|
86 |
Example:
|
|
|
87 |
```python
|
88 |
>>> from transformers import LlamaModel, LlamaConfig
|
|
|
89 |
>>> # Initializing a LLaMA llama-7b style configuration
|
90 |
>>> configuration = LlamaConfig()
|
|
|
91 |
>>> # Initializing a model from the llama-7b style configuration
|
92 |
>>> model = LlamaModel(configuration)
|
|
|
93 |
>>> # Accessing the model configuration
|
94 |
>>> configuration = model.config
|
95 |
```"""
|
|
|
110 |
self.log_scale = log_scale
|
111 |
self.rope_theta = 10000
|
112 |
self.max_position_embeddings = 4096
|
|
|
113 |
self.rope_scaling = rope_scaling
|
114 |
self._rope_scaling_validation()
|
115 |
|