nroggendorff commited on
Commit
f81694f
·
verified ·
1 Parent(s): f5014ce

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +2 -3
train.py CHANGED
@@ -47,7 +47,7 @@ def create_tokenizer(training_corpus):
47
  fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer._tokenizer)
48
  return fast_tokenizer
49
 
50
- def load_tokenizer(training_corpus):
51
  tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
52
  return tokenizer
53
 
@@ -175,8 +175,7 @@ def main(push_to_hub=True, is_inst_finetune=False):
175
  training_corpus = get_training_corpus(dataset)
176
  tokenizer = create_tokenizer(training_corpus)
177
  else:
178
- training_corpus = get_training_corpus(dataset)
179
- tokenizer = load_tokenizer(training_corpus)
180
  configure_tokenizer(tokenizer)
181
  if is_inst_finetune:
182
  model = load_model()
 
47
  fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer._tokenizer)
48
  return fast_tokenizer
49
 
50
+ def load_tokenizer():
51
  tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
52
  return tokenizer
53
 
 
175
  training_corpus = get_training_corpus(dataset)
176
  tokenizer = create_tokenizer(training_corpus)
177
  else:
178
+ tokenizer = load_tokenizer()
 
179
  configure_tokenizer(tokenizer)
180
  if is_inst_finetune:
181
  model = load_model()