tokenizer-arena / vocab /gpt_nexo_20b /test_special_token.py
xu-song's picture
update
751936e
raw
history blame
340 Bytes
"""
"""
from transformers import AutoTokenizer, AutoModelForCausalLM
# tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
tokens = tokenizer.encode("good night\n中国 ss一个人去哪里")
print(tokenizer.pad)
# tokenizer.