Spaces:
Sleeping
Sleeping
from transformers import AlbertForSequenceClassification, AlbertTokenizer, Trainer, TrainingArguments | |
from datasets import load_dataset | |
# Load a dataset (replace with your dataset) | |
dataset = load_dataset("text", data_files={"train": "path/to/train.txt", "test": "path/to/test.txt"}) | |
# Preprocess the dataset (tokenization, formatting, etc.) | |
def preprocess_function(examples): | |
return tokenizer(examples["text"], padding="max_length", truncation=True) | |
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2") | |
tokenized_dataset = dataset.map(preprocess_function, batched=True) | |
# Load the model | |
model = AlbertForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2) # Adjust num_labels as needed | |
# Define training arguments | |
training_args = TrainingArguments( | |
output_dir="./results", | |
num_train_epochs=3, | |
per_device_train_batch_size=8, | |
per_device_eval_batch_size=8, | |
warmup_steps=500, | |
weight_decay=0.01, | |
evaluate_during_training=True, | |
logging_dir="./logs", | |
) | |
# Initialize the Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_dataset["train"], | |
eval_dataset=tokenized_dataset["test"] | |
) | |
# Train the model | |
trainer.train() | |
# Save the fine-tuned model | |
model.save_pretrained("path/to/save/model") |