katuni4ka commited on
Commit
9cb12a8
1 Parent(s): 0be9a13

Update tokenization_chatglm.py

Browse files
Files changed (1) hide show
  1. tokenization_chatglm.py +2 -5
tokenization_chatglm.py CHANGED
@@ -67,18 +67,15 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
67
 
68
  def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
69
  self.name = "GLMTokenizer"
70
-
71
  self.vocab_file = vocab_file
72
  self.tokenizer = SPTokenizer(vocab_file)
73
- kwargs.pop("eos_token", None)
74
- kwargs.pop("pad_token", None)
75
- kwargs.pop("unk_token", None)
76
  self.special_tokens = {
77
  "<bos>": self.tokenizer.bos_id,
78
  "<eos>": self.tokenizer.eos_id,
79
  "<pad>": self.tokenizer.pad_id
80
  }
81
- super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
82
 
83
  def get_command(self, token):
84
  if token in self.special_tokens:
 
67
 
68
  def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
69
  self.name = "GLMTokenizer"
70
+ super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
71
  self.vocab_file = vocab_file
72
  self.tokenizer = SPTokenizer(vocab_file)
 
 
 
73
  self.special_tokens = {
74
  "<bos>": self.tokenizer.bos_id,
75
  "<eos>": self.tokenizer.eos_id,
76
  "<pad>": self.tokenizer.pad_id
77
  }
78
+
79
 
80
  def get_command(self, token):
81
  if token in self.special_tokens: