oweller2
commited on
Commit
·
efef38a
1
Parent(s):
bfe22ad
update
Browse files- tokenizer.py +8 -3
- tokenizer_config.json +2 -2
tokenizer.py
CHANGED
@@ -1,10 +1,15 @@
|
|
1 |
-
from transformers import
|
2 |
|
3 |
-
class ModernDecoderBERTTokenizer(
|
4 |
def __init__(self, *args, **kwargs):
|
|
|
5 |
super().__init__(*args, **kwargs)
|
6 |
|
7 |
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
|
|
8 |
if token_ids_1 is None:
|
9 |
return [id for id in token_ids_0 if id != self.eos_token_id]
|
10 |
-
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
|
|
|
|
|
|
|
|
1 |
+
from transformers import PreTrainedTokenizer
|
2 |
|
3 |
+
class ModernDecoderBERTTokenizer(PreTrainedTokenizer):
|
4 |
def __init__(self, *args, **kwargs):
|
5 |
+
breakpoint()
|
6 |
super().__init__(*args, **kwargs)
|
7 |
|
8 |
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
9 |
+
breakpoint()
|
10 |
if token_ids_1 is None:
|
11 |
return [id for id in token_ids_0 if id != self.eos_token_id]
|
12 |
+
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
|
13 |
+
|
14 |
+
|
15 |
+
AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
|
tokenizer_config.json
CHANGED
@@ -937,10 +937,10 @@
|
|
937 |
"sep_token": "[SEP]",
|
938 |
"unk_token": "[UNK]",
|
939 |
"eos_token": "[SEP]",
|
940 |
-
"tokenizer_class": "ModernDecoderBERTTokenizer",
|
941 |
"truncation": "right",
|
942 |
"auto_map": {
|
943 |
-
"AutoConfig": "orionweller/test-flex-gpt--
|
944 |
"AutoTokenizer": [
|
945 |
"orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
|
946 |
"orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer"
|
|
|
937 |
"sep_token": "[SEP]",
|
938 |
"unk_token": "[UNK]",
|
939 |
"eos_token": "[SEP]",
|
940 |
+
"tokenizer_class": "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
|
941 |
"truncation": "right",
|
942 |
"auto_map": {
|
943 |
+
"AutoConfig": "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
|
944 |
"AutoTokenizer": [
|
945 |
"orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
|
946 |
"orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer"
|