ramkrithiks commited on
Commit
154b411
1 Parent(s): d61d475

Update _decode method to accept integer element and convert it to sequence

Browse files

#### Why did I create this pull request?
When I tried to run this model over TGI, during model warm up step it fails by throwing `cannot convert 'int' to 'sequence'`.
#### What are the proposed changes?
Removed `List[int]` type mentioned in `_decode` function parameter and explicitly added a check to convert single element `token_ids` into list.
#### How did I test it?
I did changes over local repo in my machine and was able to successfully launch the model using TGI.

Files changed (1) hide show
  1. tokenization_xgen.py +3 -1
tokenization_xgen.py CHANGED
@@ -169,7 +169,9 @@ class XgenTokenizer(PreTrainedTokenizer):
169
  """Converts an index (integer) in a token (str) using the vocab."""
170
  return self.encoder.decode_single_token_bytes(index).decode("utf-8")
171
 
172
- def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
 
 
173
  if skip_special_tokens:
174
  token_ids = [t for t in token_ids if t not in self.all_special_ids]
175
  return self.encoder.decode(token_ids)
 
169
  """Converts an index (integer) in a token (str) using the vocab."""
170
  return self.encoder.decode_single_token_bytes(index).decode("utf-8")
171
 
172
+ def _decode(self, token_ids, skip_special_tokens: bool = False, **kwargs):
173
+ if not isinstance(token_ids, list):
174
+ token_ids = [token_ids]
175
  if skip_special_tokens:
176
  token_ids = [t for t in token_ids if t not in self.all_special_ids]
177
  return self.encoder.decode(token_ids)