File size: 558 Bytes
032e687 |
1 2 3 4 5 6 7 8 9 10 11 12 |
from xtuner.engine.hooks import DatasetInfoHook
from ..dataset.utils import VPT_CONTEXT_TOKEN, VPT_START_TOKEN, VPT_END_TOKEN
class DatasetInfoHook_withSpecialTokens(DatasetInfoHook):
def __init__(self, tokenizer, is_intern_repo_dataset=False):
super(DatasetInfoHook_withSpecialTokens, self).__init__(tokenizer, is_intern_repo_dataset)
self._add_special_tokens()
def _add_special_tokens(self):
special_tokens = [VPT_CONTEXT_TOKEN,]
num_new_tokens = self.tokenizer.add_tokens(special_tokens, special_tokens=True) |