K024 commited on
Commit
a108e1a
1 Parent(s): 5f4a3a2

Update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +1 -0
tokenizer.py CHANGED
@@ -57,6 +57,7 @@ class ChatGLMTokenizer:
57
  prefix_mask += [1, 0]
58
 
59
  if text_pair is not None:
 
60
  pair_tokens = self.text_tokenizer.encode(text_pair)
61
  tokens += pair_tokens
62
  prefix_mask += [0] * len(pair_tokens)
 
57
  prefix_mask += [1, 0]
58
 
59
  if text_pair is not None:
60
+ text_pair = self.preprocess(text_pair, linebreak, whitespaces)
61
  pair_tokens = self.text_tokenizer.encode(text_pair)
62
  tokens += pair_tokens
63
  prefix_mask += [0] * len(pair_tokens)