Update tiktoken_tokenizer.py
Browse files- tiktoken_tokenizer.py +1 -2
tiktoken_tokenizer.py
CHANGED
@@ -55,8 +55,7 @@ class BaseTokenizer(PreTrainedTokenizer):
|
|
55 |
class TikTokenizer(BaseTokenizer):
|
56 |
@staticmethod
|
57 |
def from_pretrained(path, *inputs, **kwargs):
|
58 |
-
|
59 |
-
return TikTokenizer(vocab_file="tokenizer.tiktoken")
|
60 |
|
61 |
def __init__(self, vocab_file=None):
|
62 |
pat_str = "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
|
|
55 |
class TikTokenizer(BaseTokenizer):
|
56 |
@staticmethod
|
57 |
def from_pretrained(path, *inputs, **kwargs):
|
58 |
+
return TikTokenizer(vocab_file=os.path.join(path, "tokenizer.tiktoken"))
|
|
|
59 |
|
60 |
def __init__(self, vocab_file=None):
|
61 |
pat_str = "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|