File size: 325 Bytes
c1d85a2
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
from transformers import PhobertTokenizer
from pyvi import ViTokenizer


class CustomPhobertTokenizer(PhobertTokenizer):
    def rdr_segment(self, text):
        return ViTokenizer.tokenize(text)

    def _tokenize(self, text):
        segmented_text = self.rdr_segment(text)
        return super()._tokenize(segmented_text)