nroggendorff commited on
Commit
40853aa
1 Parent(s): 3fa6168

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +1 -4
train.py CHANGED
@@ -110,10 +110,7 @@ def configure_tokenizer(tokenizer):
110
  special_tokens["additional_special_tokens"] = ["<|user|>", "<|bot|>", "<|end|>"]
111
  tokenizer.add_special_tokens(special_tokens)
112
 
113
- tokenizer.vocab = {k: v + 1 for k, v in tokenizer.vocab.items()}
114
- tokenizer.ids_to_tokens = {v: k for k, v in tokenizer.vocab.items()}
115
-
116
- tokenizer.pad_token_id = 1
117
 
118
  if INSTRUCT_FINETUNE_BOOL:
119
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")
 
110
  special_tokens["additional_special_tokens"] = ["<|user|>", "<|bot|>", "<|end|>"]
111
  tokenizer.add_special_tokens(special_tokens)
112
 
113
+ tokenizer.pad_token_id = MAX_SEQ_LENGTH - 1
 
 
 
114
 
115
  if INSTRUCT_FINETUNE_BOOL:
116
  tokenizer.user_token_id = tokenizer.convert_tokens_to_ids("<|user|>")