test-flex-gpt / tokenizer.py
oweller2
tokenizer
08d8d00
raw
history blame
1.7 kB
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def prepare_for_model(
self,
ids,
pair_ids=None,
add_special_tokens=True,
padding=False,
truncation=False,
max_length=None,
stride=0,
pad_to_multiple_of=None,
return_tensors=None,
return_token_type_ids=None,
return_attention_mask=None,
return_overflowing_tokens=False,
return_special_tokens_mask=False,
return_offsets_mapping=False,
return_length=False,
verbose=True,
prepend_batch_axis=False,
**kwargs
):
breakpoint()
if add_special_tokens and self.eos_token_id in ids:
ids = [id for id in ids if id != self.eos_token_id]
return super().prepare_for_model(
ids,
pair_ids=pair_ids,
add_special_tokens=add_special_tokens,
padding=padding,
truncation=truncation,
max_length=max_length,
stride=stride,
pad_to_multiple_of=pad_to_multiple_of,
return_tensors=return_tensors,
return_token_type_ids=return_token_type_ids,
return_attention_mask=return_attention_mask,
return_overflowing_tokens=return_overflowing_tokens,
return_special_tokens_mask=return_special_tokens_mask,
return_offsets_mapping=return_offsets_mapping,
return_length=return_length,
verbose=verbose,
prepend_batch_axis=prepend_batch_axis,
**kwargs
)