p1atdev commited on
Commit
48cea3d
1 Parent(s): b421feb

chore: strip before tokenize

Browse files
Files changed (1) hide show
  1. v2.py +1 -1
v2.py CHANGED
@@ -55,7 +55,7 @@ def normalize_tags(tokenizer: PreTrainedTokenizerBase, tags: str):
55
  [
56
  token
57
  for token in tokenizer.encode_plus(
58
- tags,
59
  return_tensors="pt",
60
  ).input_ids[0]
61
  if int(token) != tokenizer.unk_token_id
 
55
  [
56
  token
57
  for token in tokenizer.encode_plus(
58
+ tags.strip(),
59
  return_tensors="pt",
60
  ).input_ids[0]
61
  if int(token) != tokenizer.unk_token_id