""" ## reference https://github.com/xai-org/grok-1/blob/main/run.py vocab_size=128 * 1024, pad_token=0, eos_token=2, """ import os import sentencepiece from tokenizer import sptokenizer_patch CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) MODEL_FILE = os.path.join(CURRENT_DIR, "tokenizer.model") tokenizer = sentencepiece.SentencePieceProcessor(model_file=MODEL_FILE) # print(tokenizer.decode([1,2,3], skip_special_tokens=True))