Spaces:
Sleeping
Sleeping
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer | |
from datasets import load_dataset | |
# Model Pre-trained | |
MODEL_NAME = "indobenchmark/indobert-base-p2" | |
# Load Dataset | |
dataset = load_dataset("csv", data_files="dataset.csv") | |
# Tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
def preprocess(data): | |
return tokenizer(data['pertanyaan'], padding="max_length", truncation=True) | |
# Preprocessing | |
dataset = dataset.map(preprocess, batched=True) | |
dataset = dataset.rename_column("jawaban", "labels") | |
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"]) | |
# Load Model | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2) | |
# Training Arguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
evaluation_strategy="epoch", | |
learning_rate=2e-5, | |
per_device_train_batch_size=16, | |
num_train_epochs=3, | |
save_total_limit=2 | |
) | |
# Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=dataset['train'], | |
eval_dataset=dataset['validation'] | |
) | |
# Train Model | |
trainer.train() | |
# Save Model | |
model.save_pretrained("./fine_tuned_model") | |
print("Model telah dilatih ulang dan disimpan ke './fine_tuned_model'.") | |