File size: 491 Bytes
751936e
 
 
 
 
 
 
 
 
 
5425d5d
751936e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16


import os
from transformers import GPT2Tokenizer
from vocab import TokenizerType, TokenizerImpl

# CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
# TOKENIZER_DIR = os.path.join(CURRENT_DIR, "tokenizer")
# tokenizer = GPT2Tokenizer.from_pretrained(TOKENIZER_DIR)

tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")

# tokenizer.type = TokenizerType.

# 源码 https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/tokenization_gpt2.py