DenseLabelDev / projects /colva /engine /dataset_info_hook.py
zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
from xtuner.engine.hooks import DatasetInfoHook
from ..dataset.utils import VPT_CONTEXT_TOKEN, VPT_START_TOKEN, VPT_END_TOKEN
class DatasetInfoHook_withSpecialTokens(DatasetInfoHook):
def __init__(self, tokenizer, is_intern_repo_dataset=False):
super(DatasetInfoHook_withSpecialTokens, self).__init__(tokenizer, is_intern_repo_dataset)
self._add_special_tokens()
def _add_special_tokens(self):
special_tokens = [VPT_CONTEXT_TOKEN,]
num_new_tokens = self.tokenizer.add_tokens(special_tokens, special_tokens=True)