oweller2 commited on
Commit
efef38a
·
1 Parent(s): bfe22ad
Files changed (2) hide show
  1. tokenizer.py +8 -3
  2. tokenizer_config.json +2 -2
tokenizer.py CHANGED
@@ -1,10 +1,15 @@
1
- from transformers import PreTrainedTokenizerFast
2
 
3
- class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
  def __init__(self, *args, **kwargs):
 
5
  super().__init__(*args, **kwargs)
6
 
7
  def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
 
8
  if token_ids_1 is None:
9
  return [id for id in token_ids_0 if id != self.eos_token_id]
10
- return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
 
 
 
 
1
+ from transformers import PreTrainedTokenizer
2
 
3
+ class ModernDecoderBERTTokenizer(PreTrainedTokenizer):
4
  def __init__(self, *args, **kwargs):
5
+ breakpoint()
6
  super().__init__(*args, **kwargs)
7
 
8
  def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
9
+ breakpoint()
10
  if token_ids_1 is None:
11
  return [id for id in token_ids_0 if id != self.eos_token_id]
12
+ return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
13
+
14
+
15
+ AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
tokenizer_config.json CHANGED
@@ -937,10 +937,10 @@
937
  "sep_token": "[SEP]",
938
  "unk_token": "[UNK]",
939
  "eos_token": "[SEP]",
940
- "tokenizer_class": "ModernDecoderBERTTokenizer",
941
  "truncation": "right",
942
  "auto_map": {
943
- "AutoConfig": "orionweller/test-flex-gpt--configuration_bert.FlexBertConfig",
944
  "AutoTokenizer": [
945
  "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
946
  "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer"
 
937
  "sep_token": "[SEP]",
938
  "unk_token": "[UNK]",
939
  "eos_token": "[SEP]",
940
+ "tokenizer_class": "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
941
  "truncation": "right",
942
  "auto_map": {
943
+ "AutoConfig": "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
944
  "AutoTokenizer": [
945
  "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer",
946
  "orionweller/test-flex-gpt--tokenizer.ModernDecoderBERTTokenizer"