from transformers import PhobertTokenizer | |
from pyvi import ViTokenizer | |
class CustomPhobertTokenizer(PhobertTokenizer): | |
def rdr_segment(self, text): | |
return ViTokenizer.tokenize(text) | |
def _tokenize(self, text): | |
segmented_text = self.rdr_segment(text) | |
return super()._tokenize(segmented_text) | |