duzx16 commited on
Commit
833de79
1 Parent(s): 382cc70

Fix tokenizer for transformers 0.34

Browse files
Files changed (1) hide show
  1. tokenization_chatglm.py +2 -2
tokenization_chatglm.py CHANGED
@@ -65,8 +65,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
65
 
66
  model_input_names = ["input_ids", "attention_mask", "position_ids"]
67
 
68
- def __init__(self, vocab_file, padding_side="left", **kwargs):
69
- super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=False, **kwargs)
70
  self.name = "GLMTokenizer"
71
 
72
  self.vocab_file = vocab_file
@@ -76,6 +75,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
76
  "<eos>": self.tokenizer.eos_id,
77
  "<pad>": self.tokenizer.pad_id
78
  }
 
79
 
80
  def get_command(self, token):
81
  if token in self.special_tokens:
 
65
 
66
  model_input_names = ["input_ids", "attention_mask", "position_ids"]
67
 
68
+ def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
 
69
  self.name = "GLMTokenizer"
70
 
71
  self.vocab_file = vocab_file
 
75
  "<eos>": self.tokenizer.eos_id,
76
  "<pad>": self.tokenizer.pad_id
77
  }
78
+ super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
79
 
80
  def get_command(self, token):
81
  if token in self.special_tokens: