yczhang commited on
Commit
7260346
·
1 Parent(s): 65efc11

Update: fix bug

Browse files
Files changed (1) hide show
  1. tokenization_interns1.py +2 -1
tokenization_interns1.py CHANGED
@@ -24,6 +24,7 @@ from functools import lru_cache
24
  import regex as re
25
  import sentencepiece as spm
26
 
 
27
  from transformers.tokenization_utils_base import AddedToken, TextInput
28
  from transformers.utils import logging
29
  from packaging import version
@@ -568,7 +569,7 @@ class InternS1Tokenizer(PreTrainedTokenizer):
568
  pad_token=pad_token,
569
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
570
  split_special_tokens=split_special_tokens,
571
- special_tokens_pattern="bos_eos",
572
  **kwargs,
573
  )
574
 
 
24
  import regex as re
25
  import sentencepiece as spm
26
 
27
+ import transformers
28
  from transformers.tokenization_utils_base import AddedToken, TextInput
29
  from transformers.utils import logging
30
  from packaging import version
 
569
  pad_token=pad_token,
570
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
571
  split_special_tokens=split_special_tokens,
572
+ special_tokens_pattern="none",
573
  **kwargs,
574
  )
575