Spaces:

Flexy0
/

Veo-1.1x7B

Runtime error

App Files Files Community

Veo-1.1x7B / app.py

Flexy0

Update app.py

4839cae verified 7 months ago

raw

history blame contribute delete

2.03 kB

	from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
	import torch
	from datasets import load_dataset
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = AutoModelForSequenceClassification.from_pretrained("google/flan-t5-small").to(device)
	tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")



	dataset = load_dataset("athirdpath/DPO_Pairs-Roleplay-Alpaca-NSFW")
	train_dataset = dataset["train"]

	max_length = 512 # You can change this according to your needs
	def preprocess(example):
	# Tokenize the inputs and outputs
	input_tokens = tokenizer(example["prompt"], truncation=True, max_length=max_length, padding="max_length", return_tensors="pt")
	output_tokens = tokenizer(example["chosen"], truncation=True, max_length=max_length, padding="max_length", return_tensors="pt")
	# Convert the tokens to tensors and move them to the device
	input_ids = input_tokens["input_ids"].squeeze().to(device)
	attention_mask = input_tokens["attention_mask"].squeeze().to(device)
	output_ids = output_tokens["input_ids"].squeeze().to(device)
	# Return a dictionary of tensors
	return {"input_ids": input_ids, "attention_mask": attention_mask, "output_ids": output_ids}

	# Apply the preprocess function to the train, validation, and test sets
	train_dataset = train_dataset.map(preprocess, batched=True)



	# Define the training arguments
	training_args = TrainingArguments(
	output_dir="output",
	num_train_epochs=3,
	learning_rate=5e-5,
	per_device_train_batch_size=8,
	per_device_eval_batch_size=8,
	evaluation_strategy="steps", # Change this to steps
	save_strategy="steps", # Change this to steps
	logging_dir="logs",
	load_best_model_at_end=True,
	)

	# Define the trainer
	trainer = Trainer(
	model=model, # The model to train
	args=training_args, # The training arguments
	train_dataset=train_dataset, # The training dataset
	)

	# Train the model
	trainer.train()

	# Evaluate the m odel on the test set
	trainer.evaluate(test_dataset)