""" https://github.com/EleutherAI/gpt-neox/blob/main/tools/corpora.py ## """ from transformers import AutoTokenizer, AutoModelForCausalLM # tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-NeoXT-Chat-Base-20B") tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") tokens = tokenizer.encode("good night\n中国 ss一个人去哪里") print(tokens) print(tokenizer.decode(tokens)) for token in tokens: print(token, tokenizer.decode([token]))