RuttoniAI / train.py
lu2000luk's picture
Upload 8 files
809f9c2
raw
history blame contribute delete
No virus
2.23 kB
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import DataCollatorWithPadding
from datasets import Dataset
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
# Load the CSV file
df = pd.read_csv("ruttoniaitrain1.csv")
# Rename the columns
df = df.rename(columns={"Quest": "text", "Answer": "target"})
# Convert the DataFrame to a Hugging Face Dataset
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
print("CSV Processed and loaded!")
# Initialize the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
print("Model Loaded!")
# Tokenize and format the data
def preprocess_function(examples):
inputs = tokenizer(
examples['text'],
truncation=True,
padding='longest',
max_length=512
)
targets = tokenizer(
examples['target'],
truncation=True,
padding='longest',
max_length=32
)
examples['input_ids'] = inputs['input_ids']
examples['attention_mask'] = inputs['attention_mask']
examples['labels'] = targets['input_ids']
return examples
train_dataset = train_dataset.map(preprocess_function, batched=True)
training_args = {
'output_dir': './Ruttoni_AI',
'num_train_epochs': 3,
'per_device_train_batch_size': 4,
'save_steps': 500,
'save_total_limit': 2,
'logging_steps': 100,
'evaluation_strategy': 'steps',
'eval_steps': 500,
'logging_dir': './logs',
'overwrite_output_dir': True,
'warmup_steps': 500,
'learning_rate': 1e-4,
'report_to': 'none'
}
print("Arguments and functions initialized!")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
from transformers import Trainer, TrainingArguments
trainer = Trainer(
model=model,
args=TrainingArguments(**training_args),
data_collator=data_collator,
train_dataset=train_dataset,
eval_dataset=val_dataset,
)
print("Training...")
trainer.train()
print("Saving...")
trainer.save_model("./Ruttoni_AI")