""" | |
## 封装 | |
__all__ = ['icetk'] | |
sentencepiece_model_pb2 给包装起来了, | |
## ss | |
ss | |
""" | |
import sentencepiece as spm | |
from icetk import sentencepiece_model_pb2 as model | |
def __init__(self, model_path): | |
self.proto = model.ModelProto() | |
with open(model_path, 'rb') as fin: | |
proto_str = fin.read() | |
self.proto.ParseFromString(proto_str) | |
self.refresh() | |
def refresh(self): | |
self.sp = spm.SentencePieceProcessor() | |
self.sp.Load(model_proto=self.proto.SerializeToString()) | |
self.num_tokens = self.sp.vocab_size() |