import os from transformers import GPT2Tokenizer from vocab import TokenizerType, TokenizerImpl # CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) # TOKENIZER_DIR = os.path.join(CURRENT_DIR, "tokenizer") # tokenizer = GPT2Tokenizer.from_pretrained(TOKENIZER_DIR) tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2") # tokenizer.type = TokenizerType. # 源码 https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/tokenization_gpt2.py