NeoZ123 commited on
Commit
a084b7a
·
verified ·
1 Parent(s): eacb565

Update tiktoken_tokenizer.py

Browse files
Files changed (1) hide show
  1. tiktoken_tokenizer.py +1 -2
tiktoken_tokenizer.py CHANGED
@@ -55,8 +55,7 @@ class BaseTokenizer(PreTrainedTokenizer):
55
  class TikTokenizer(BaseTokenizer):
56
  @staticmethod
57
  def from_pretrained(path, *inputs, **kwargs):
58
- # return TikTokenizer(vocab_file=os.path.join(path, "tokenizer.tiktoken"))
59
- return TikTokenizer(vocab_file="tokenizer.tiktoken")
60
 
61
  def __init__(self, vocab_file=None):
62
  pat_str = "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
 
55
  class TikTokenizer(BaseTokenizer):
56
  @staticmethod
57
  def from_pretrained(path, *inputs, **kwargs):
58
+ return TikTokenizer(vocab_file=os.path.join(path, "tokenizer.tiktoken"))
 
59
 
60
  def __init__(self, vocab_file=None):
61
  pat_str = "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"