from trl import SFTTrainer from transformers import TrainingArguments import json import torch import os os.environ["TRANSFORMERS_CACHE"] = "./cache" with open("finetuning_set_v1.0.json") as f: dataset = json.load(f) # torch.cuda.empty_cache() # torch.cuda.set_per_process_memory_fraction(0.9) args = TrainingArguments( per_device_train_batch_size=1, per_device_eval_batch_size=1, output_dir="Salesforce-codegen2-1B-text-to-neurapy", gradient_accumulation_steps=1, num_train_epochs=8 ) trainer = SFTTrainer( "Salesforce/codegen2-1B", train_dataset=dataset, dataset_text_field="completion", max_seq_length=1024, args=args, model_init_kwargs={ "torch_dtype": torch.bfloat16, }, packing=True ) trainer.train() # testing without saving uncomment for actual training # trainer.save_model("Salesforce-codegen2-1B-text-to-neurapy")