IndoBot-AI / fine_tune.py
Sofa321's picture
Create fine_tune.py
2f4f974 verified
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
# Model Pre-trained
MODEL_NAME = "indobenchmark/indobert-base-p2"
# Load Dataset
dataset = load_dataset("csv", data_files="dataset.csv")
# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def preprocess(data):
return tokenizer(data['pertanyaan'], padding="max_length", truncation=True)
# Preprocessing
dataset = dataset.map(preprocess, batched=True)
dataset = dataset.rename_column("jawaban", "labels")
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
# Load Model
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
# Training Arguments
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=16,
num_train_epochs=3,
save_total_limit=2
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset['train'],
eval_dataset=dataset['validation']
)
# Train Model
trainer.train()
# Save Model
model.save_pretrained("./fine_tuned_model")
print("Model telah dilatih ulang dan disimpan ke './fine_tuned_model'.")