Veo-1.1x7B / app.py
Flexy0's picture
Update app.py
4839cae verified
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import torch
from datasets import load_dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained("google/flan-t5-small").to(device)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
dataset = load_dataset("athirdpath/DPO_Pairs-Roleplay-Alpaca-NSFW")
train_dataset = dataset["train"]
max_length = 512 # You can change this according to your needs
def preprocess(example):
# Tokenize the inputs and outputs
input_tokens = tokenizer(example["prompt"], truncation=True, max_length=max_length, padding="max_length", return_tensors="pt")
output_tokens = tokenizer(example["chosen"], truncation=True, max_length=max_length, padding="max_length", return_tensors="pt")
# Convert the tokens to tensors and move them to the device
input_ids = input_tokens["input_ids"].squeeze().to(device)
attention_mask = input_tokens["attention_mask"].squeeze().to(device)
output_ids = output_tokens["input_ids"].squeeze().to(device)
# Return a dictionary of tensors
return {"input_ids": input_ids, "attention_mask": attention_mask, "output_ids": output_ids}
# Apply the preprocess function to the train, validation, and test sets
train_dataset = train_dataset.map(preprocess, batched=True)
# Define the training arguments
training_args = TrainingArguments(
output_dir="output",
num_train_epochs=3,
learning_rate=5e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
evaluation_strategy="steps", # Change this to steps
save_strategy="steps", # Change this to steps
logging_dir="logs",
load_best_model_at_end=True,
)
# Define the trainer
trainer = Trainer(
model=model, # The model to train
args=training_args, # The training arguments
train_dataset=train_dataset, # The training dataset
)
# Train the model
trainer.train()
# Evaluate the m odel on the test set
trainer.evaluate(test_dataset)