Salesforce
/

xgen-7b-8k-base

Text Generation

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

tianxie-sf commited on Jul 2, 2023

Commit

3987e09

•

1 Parent(s): f12f979

_convert_id_to_token (#12)

- _convert_id_to_token (aa0c00600854f6330495cb368294c9983017ff2f)

Files changed (1) hide show

tokenization_xgen.py +2 -2

tokenization_xgen.py CHANGED Viewed

@@ -155,7 +155,7 @@ class XgenTokenizer(PreTrainedTokenizer):
     def _convert_id_to_token(self, index):
         """Converts an index (integer) in a token (str) using the vocab."""
-        return self.encoder.decode_single_token_bytes(index)
     def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
         if skip_special_tokens:
@@ -231,4 +231,4 @@ class XgenTokenizer(PreTrainedTokenizer):
     # has no vocab file
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
-        return ()

     def _convert_id_to_token(self, index):
         """Converts an index (integer) in a token (str) using the vocab."""
+        return self.encoder.decode_single_token_bytes(index).decode("utf-8")
     def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
         if skip_special_tokens:
     # has no vocab file
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
+        return ()