Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling | |
from datasets import load_dataset | |
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") | |
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") | |
tokenizer.pad_token = tokenizer.eos_token | |
dataset = load_dataset("HuggingFaceH4/ultrachat_200k") | |
dataset = dataset['train_sft'].select(range(5)) | |
def tokenize_function(examples): | |
return tokenizer(examples["prompt"], padding="max_length", truncation=True) | |
td = dataset.map(tokenize_function, batched=True) | |
training_args = TrainingArguments( | |
output_dir="./output", | |
per_device_train_batch_size=4, | |
num_train_epochs=3, | |
logging_dir="./logs", | |
) | |
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) | |
""" | |
dataloader_config = DataLoaderConfiguration( | |
dispatch_batches=None, | |
split_batches=False, | |
even_batches=True, | |
use_seedable_sampler=True | |
) | |
accelerator = Accelerator(dataloader_config=dataloader_config) | |
with accelerator.prepare(): | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
data_collator=data_collator, | |
train_dataset=td, | |
) | |
trainer.train() | |
trainer.save_model("fine_tuned_gpt2") | |
""" | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
data_collator=data_collator, | |
train_dataset=td, | |
) | |
trainer.train() | |
trainer.save_model("fine_tuned_gpt2") |