import time import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments from datasets import load_dataset from trl import SFTTrainer from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, PeftConfig dataset = load_dataset("csv", data_files="nowhere_training_input.csv", delimiter=";", split="train") if torch.cuda.is_available(): print("Cuda is available") base_model_id = "abacaj/phi-2-super" base_peft_id = "./results" output_dir = "./results_phi-2-super" tokenizer = AutoTokenizer.from_pretrained(base_model_id) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token print("pad_token was missing and has been set to eos_token") # Configuration to load model in 4-bit quantized bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type='nf4', #bnb_4bit_compute_dtype='float16', bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=False) base_model = AutoModelForCausalLM.from_pretrained(base_model_id, attn_implementation="flash_attention_2", quantization_config=bnb_config, torch_dtype="auto") model = PeftModel.from_pretrained(base_model, base_peft_id, is_trainable=True) print(model) # Gradient checkpointing to save memory # model.gradient_checkpointing_enable() # Freeze base model layers and cast layernorm in fp32 # model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True) training_args = TrainingArguments( output_dir=output_dir, # Output directory for checkpoints and predictions overwrite_output_dir=True, # Overwrite the content of the output directory per_device_train_batch_size=2, # Batch size for training per_device_eval_batch_size=2, # Batch size for evaluation gradient_accumulation_steps=5, # number of steps before optimizing gradient_checkpointing=True, # Enable gradient checkpointing gradient_checkpointing_kwargs={"use_reentrant": False}, warmup_steps=10, # Number of warmup steps #max_steps=1000, # Total number of training steps num_train_epochs=100, # Number of training epochs learning_rate=5e-5, # Learning rate weight_decay=0.01, # Weight decay optim="paged_adamw_8bit", #Keep the optimizer state and quantize it bf16=True, #Use mixed precision training #For logging and saving logging_dir='./logs', logging_strategy="epoch", logging_steps=10, save_strategy="epoch", save_steps=10, save_total_limit=2, # Limit the total number of checkpoints evaluation_strategy="epoch", eval_steps=10, load_best_model_at_end=True, # Load the best model at the end of training lr_scheduler_type="linear", ) def formatting_func(data): # text = f"[INST] {data['prompt']} [/INST]{data['completion']}{tokenizer.eos_token} " chat = [ { "role": "user", "content": data['prompt'] }, { "role": "assistant", "content": data['completion'] }, ] text = tokenizer.apply_chat_template(chat, tokenize=False) # print(text) data['text'] = text return data dataset = dataset.map(formatting_func) trainer = SFTTrainer( model=model, train_dataset=dataset, eval_dataset=dataset, args=training_args, max_seq_length=1024, packing=True, dataset_text_field="text", neftune_noise_alpha=5, ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! start_time = time.time() # Record the start time trainer.train() end_time = time.time() # Record the end time training_time = end_time - start_time # Calculate total training time trainer.save_model(output_dir) print(f"Training completed in {training_time} seconds.")