Fix(tokenizer): Set rstrip,lstrip,norm to False (#678)
Browse files- src/axolotl/utils/models.py +10 -2
src/axolotl/utils/models.py
CHANGED
@@ -11,6 +11,7 @@ from optimum.bettertransformer import BetterTransformer
|
|
11 |
from peft import PeftConfig, prepare_model_for_kbit_training
|
12 |
from peft.tuners.lora import QuantLinear
|
13 |
from transformers import ( # noqa: F401
|
|
|
14 |
AutoConfig,
|
15 |
AutoModelForCausalLM,
|
16 |
AutoTokenizer,
|
@@ -82,9 +83,16 @@ def load_tokenizer(cfg):
|
|
82 |
|
83 |
if cfg.special_tokens:
|
84 |
for k, val in cfg.special_tokens.items():
|
85 |
-
tokenizer.add_special_tokens(
|
|
|
|
|
86 |
if cfg.tokens:
|
87 |
-
tokenizer.add_tokens(
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
return tokenizer
|
90 |
|
|
|
11 |
from peft import PeftConfig, prepare_model_for_kbit_training
|
12 |
from peft.tuners.lora import QuantLinear
|
13 |
from transformers import ( # noqa: F401
|
14 |
+
AddedToken,
|
15 |
AutoConfig,
|
16 |
AutoModelForCausalLM,
|
17 |
AutoTokenizer,
|
|
|
83 |
|
84 |
if cfg.special_tokens:
|
85 |
for k, val in cfg.special_tokens.items():
|
86 |
+
tokenizer.add_special_tokens(
|
87 |
+
{k: AddedToken(val, rstrip=False, lstrip=False, normalized=False)}
|
88 |
+
)
|
89 |
if cfg.tokens:
|
90 |
+
tokenizer.add_tokens(
|
91 |
+
[
|
92 |
+
AddedToken(token, rstrip=False, lstrip=False, normalized=False)
|
93 |
+
for token in cfg.tokens
|
94 |
+
]
|
95 |
+
)
|
96 |
|
97 |
return tokenizer
|
98 |
|