|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments |
|
|
|
|
|
questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt' |
|
|
|
|
|
with open(questions_file, 'r') as f: |
|
questions = f.read().splitlines() |
|
|
|
|
|
def custom_tokenizer(text): |
|
""" |
|
Define your custom tokenizer function here |
|
""" |
|
return text.split() |
|
|
|
|
|
tokenized_questions = [custom_tokenizer(question) for question in questions] |
|
|
|
|
|
model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt') |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir='./results', |
|
evaluation_strategy='epoch', |
|
learning_rate=2e-4, |
|
per_device_train_batch_size=16, |
|
per_device_eval_batch_size=16, |
|
num_train_epochs=1, |
|
weight_decay=0.01, |
|
) |
|
|
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=training_args, |
|
train_dataset=tokenized_questions, |
|
) |
|
trainer.train() |
|
|
|
|
|
model_path = './trained_model' |
|
model.save_pretrained(model_path) |
|
|