# original author: intervitens | |
import sentencepiece.sentencepiece_model_pb2 as model | |
m = model.ModelProto() | |
m.ParseFromString(open('./tokenizer.model', 'rb').read()) | |
m.pieces[11].piece = '<|im_start|>' | |
m.pieces[12].piece = '<|im_end|>' | |
with open('tokenizer_fixed.model', 'wb') as f: | |
f.write(m.SerializeToString()) | |