felixdae
commited on
Commit
•
ea56387
1
Parent(s):
d3fe58f
Update tokenization_chatglm.py
Browse filesfix "piece id is out of range"
- tokenization_chatglm.py +2 -0
tokenization_chatglm.py
CHANGED
@@ -72,6 +72,8 @@ class SPTokenizer:
|
|
72 |
return self.index_special_tokens[index]
|
73 |
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
|
74 |
return ""
|
|
|
|
|
75 |
return self.sp_model.IdToPiece(index)
|
76 |
|
77 |
|
|
|
72 |
return self.index_special_tokens[index]
|
73 |
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
|
74 |
return ""
|
75 |
+
if index >= self.sp_model.vocab_size():
|
76 |
+
return ""
|
77 |
return self.sp_model.IdToPiece(index)
|
78 |
|
79 |
|