import os os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" from sentencepiece import sentencepiece_model_pb2 as sp_pb2_model import sentencepiece as spm baichuan_spm = sp_pb2_model.ModelProto() baichuan_spm.ParseFromString(open("Baichuan2-7B-Chat/tokenizer.model", "rb").read()) vocab_size = len(baichuan_spm.pieces) for i in range(vocab_size): piece = baichuan_spm.pieces[i] if "reser" in piece.piece: print(i, str(piece).strip().replace("\n", ", "))