""" ## 封装 __all__ = ['icetk'] sentencepiece_model_pb2 给包装起来了, ## ss ss """ import sentencepiece as spm from icetk import sentencepiece_model_pb2 as model def __init__(self, model_path): self.proto = model.ModelProto() with open(model_path, 'rb') as fin: proto_str = fin.read() self.proto.ParseFromString(proto_str) self.refresh() def refresh(self): self.sp = spm.SentencePieceProcessor() self.sp.Load(model_proto=self.proto.SerializeToString()) self.num_tokens = self.sp.vocab_size()