import os | |
from transformers import GPT2Tokenizer | |
from vocab import TokenizerType, TokenizerImpl | |
# CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
# TOKENIZER_DIR = os.path.join(CURRENT_DIR, "tokenizer") | |
# tokenizer = GPT2Tokenizer.from_pretrained(TOKENIZER_DIR) | |
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2") | |
# tokenizer.type = TokenizerType. | |
# 源码 https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/tokenization_gpt2.py | |