File size: 558 Bytes
032e687
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
from xtuner.engine.hooks import DatasetInfoHook
from ..dataset.utils import VPT_CONTEXT_TOKEN, VPT_START_TOKEN, VPT_END_TOKEN

class DatasetInfoHook_withSpecialTokens(DatasetInfoHook):
    def __init__(self, tokenizer, is_intern_repo_dataset=False):
        super(DatasetInfoHook_withSpecialTokens, self).__init__(tokenizer, is_intern_repo_dataset)

        self._add_special_tokens()
    
    def _add_special_tokens(self):
        special_tokens = [VPT_CONTEXT_TOKEN,]
        num_new_tokens = self.tokenizer.add_tokens(special_tokens, special_tokens=True)