tokenizer_class
Browse files- configuration_geov.py +17 -17
configuration_geov.py
CHANGED
@@ -13,11 +13,10 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
""" GeoV model configuration"""
|
16 |
-
|
17 |
from transformers.configuration_utils import PretrainedConfig
|
18 |
from transformers.utils import logging
|
19 |
|
20 |
-
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
23 |
GEOV_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
@@ -75,23 +74,24 @@ class GeoVConfig(PretrainedConfig):
|
|
75 |
>>> configuration = model.config # doctest: +SKIP
|
76 |
```"""
|
77 |
model_type = "geov"
|
|
|
78 |
|
79 |
def __init__(
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
):
|
96 |
super().__init__(
|
97 |
bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
""" GeoV model configuration"""
|
16 |
+
import geov.tokenization_geov
|
17 |
from transformers.configuration_utils import PretrainedConfig
|
18 |
from transformers.utils import logging
|
19 |
|
|
|
20 |
logger = logging.get_logger(__name__)
|
21 |
|
22 |
GEOV_PRETRAINED_CONFIG_ARCHIVE_MAP = {
|
|
|
74 |
>>> configuration = model.config # doctest: +SKIP
|
75 |
```"""
|
76 |
model_type = "geov"
|
77 |
+
tokenizer_class = geov.tokenization_geov.GeoVTokenizer
|
78 |
|
79 |
def __init__(
|
80 |
+
self,
|
81 |
+
vocab_size=65_536,
|
82 |
+
hidden_size=5_120,
|
83 |
+
num_hidden_layers=32,
|
84 |
+
num_attention_heads=40,
|
85 |
+
intermediate_size=1024 * 5 * 4,
|
86 |
+
layer_norm_eps=1e-4,
|
87 |
+
rotary_emb_base=10000,
|
88 |
+
max_position_embeddings=2048,
|
89 |
+
use_extra_biases_ffn=False,
|
90 |
+
use_cache=True,
|
91 |
+
bos_token_id=0,
|
92 |
+
eos_token_id=2,
|
93 |
+
tie_word_embeddings=False,
|
94 |
+
**kwargs,
|
95 |
):
|
96 |
super().__init__(
|
97 |
bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs
|