Spaces:
Starting
on
A100
Starting
on
A100
nroggendorff
commited on
Commit
•
1f732b8
1
Parent(s):
14e2d79
Update train.py
Browse files
train.py
CHANGED
@@ -63,7 +63,7 @@ def create_tokenizer(training_corpus):
|
|
63 |
return fast_tokenizer
|
64 |
|
65 |
def load_tokenizer():
|
66 |
-
return AutoTokenizer.from_pretrained(OUTPUT_REPO)
|
67 |
|
68 |
def get_training_corpus(dataset):
|
69 |
for i in range(0, len(dataset['text']), 1000):
|
@@ -105,7 +105,7 @@ def create_model(tokenizer):
|
|
105 |
return LlamaForCausalLM(config)
|
106 |
|
107 |
def load_model():
|
108 |
-
return AutoModelForCausalLM.from_pretrained(OUTPUT_REPO)
|
109 |
|
110 |
def configure_tokenizer(tokenizer):
|
111 |
special_tokens = {
|
|
|
63 |
return fast_tokenizer
|
64 |
|
65 |
def load_tokenizer():
|
66 |
+
return AutoTokenizer.from_pretrained(OUTPUT_REPO + '-it' if INSTRUCT_FINETUNE_BOOL else OUTPUT_REPO)
|
67 |
|
68 |
def get_training_corpus(dataset):
|
69 |
for i in range(0, len(dataset['text']), 1000):
|
|
|
105 |
return LlamaForCausalLM(config)
|
106 |
|
107 |
def load_model():
|
108 |
+
return AutoModelForCausalLM.from_pretrained(OUTPUT_REPO + '-it' if INSTRUCT_FINETUNE_BOOL else OUTPUT_REPO)
|
109 |
|
110 |
def configure_tokenizer(tokenizer):
|
111 |
special_tokens = {
|