Nanobit commited on
Commit
10388a8
1 Parent(s): 9f7e8a9

fix(tokenizer): update log order after update (#806)

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/models.py +5 -5
src/axolotl/utils/models.py CHANGED
@@ -72,11 +72,6 @@ def load_tokenizer(cfg):
72
  # set a pad_token, but use eos_token so we don't add a new token
73
  tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN
74
 
75
- LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
76
- LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
77
- LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
78
- LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
79
-
80
  if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
81
  tokenizer.add_special_tokens({"pad_token": "[PAD]"})
82
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
@@ -98,6 +93,11 @@ def load_tokenizer(cfg):
98
  ]
99
  )
100
 
 
 
 
 
 
101
  return tokenizer
102
 
103
 
 
72
  # set a pad_token, but use eos_token so we don't add a new token
73
  tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN
74
 
 
 
 
 
 
75
  if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
76
  tokenizer.add_special_tokens({"pad_token": "[PAD]"})
77
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
93
  ]
94
  )
95
 
96
+ LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
97
+ LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
98
+ LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
99
+ LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")
100
+
101
  return tokenizer
102
 
103