pragaash commited on
Commit
28fe393
1 Parent(s): 60eb4c7

Update batch encode plus first argument to match HF convention.

Browse files
Files changed (1) hide show
  1. tokenizer.py +2 -2
tokenizer.py CHANGED
@@ -101,9 +101,9 @@ class ByteTokenizer(PreTrainedTokenizer):
101
  verbose=kwargs.get('verbose', True),
102
  )
103
 
104
- def _batch_encode_plus(self, batch_text: List[str], **kwargs) -> BatchEncoding:
105
 
106
- input_ids = [(self.byte_tokenize(text).tolist(), None) for text in batch_text]
107
 
108
  return self._batch_prepare_for_model(
109
  input_ids,
 
101
  verbose=kwargs.get('verbose', True),
102
  )
103
 
104
+ def _batch_encode_plus(self, batch_text_or_text_pairs: List[str], **kwargs) -> BatchEncoding:
105
 
106
+ input_ids = [(self.byte_tokenize(text).tolist(), None) for text in batch_text_or_text_pairs]
107
 
108
  return self._batch_prepare_for_model(
109
  input_ids,