Update batch encode plus first argument to match HF convention.
Browse files- tokenizer.py +2 -2
tokenizer.py
CHANGED
@@ -101,9 +101,9 @@ class ByteTokenizer(PreTrainedTokenizer):
|
|
101 |
verbose=kwargs.get('verbose', True),
|
102 |
)
|
103 |
|
104 |
-
def _batch_encode_plus(self,
|
105 |
|
106 |
-
input_ids = [(self.byte_tokenize(text).tolist(), None) for text in
|
107 |
|
108 |
return self._batch_prepare_for_model(
|
109 |
input_ids,
|
|
|
101 |
verbose=kwargs.get('verbose', True),
|
102 |
)
|
103 |
|
104 |
+
def _batch_encode_plus(self, batch_text_or_text_pairs: List[str], **kwargs) -> BatchEncoding:
|
105 |
|
106 |
+
input_ids = [(self.byte_tokenize(text).tolist(), None) for text in batch_text_or_text_pairs]
|
107 |
|
108 |
return self._batch_prepare_for_model(
|
109 |
input_ids,
|