# original author: intervitens import sentencepiece.sentencepiece_model_pb2 as model m = model.ModelProto() m.ParseFromString(open('./tokenizer.model', 'rb').read()) m.pieces[11].piece = '<|im_start|>' m.pieces[12].piece = '<|im_end|>' with open('tokenizer_fixed.model', 'wb') as f: f.write(m.SerializeToString())