highdeff
/

highdeff1

Model card Files Files and versions Community

highdeff1 / run train.py

highdeff's picture

Upload 16 files

2c07569 about 2 years ago

history blame contribute delete

1.29 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments

	# Define the path to your questions file
	questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt'

	# Load your data from the questions file
	with open(questions_file, 'r') as f:
	questions = f.read().splitlines()

	# Define your custom tokenizer
	def custom_tokenizer(text):
	"""
	Define your custom tokenizer function here
	"""
	return text.split()

	# Tokenize your questions
	tokenized_questions = [custom_tokenizer(question) for question in questions]

	# Load your custom model
	model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt')

	# Define the training arguments
	training_args = TrainingArguments(
	output_dir='./results',
	evaluation_strategy='epoch',
	learning_rate=2e-4,
	per_device_train_batch_size=16,
	per_device_eval_batch_size=16,
	num_train_epochs=1,
	weight_decay=0.01,
	)

	# Define the trainer and train the model
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_questions,
	)
	trainer.train()

	# Save the trained model
	model_path = './trained_model'
	model.save_pretrained(model_path)