import signal import sys import torch from datasets import load_dataset from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForCausalLM from trl import SFTTrainer # Importing Sophia optimizer from sophia import SophiaG # Signal handler function def signal_handler(sig, frame): print('You pressed Ctrl+C! Exiting...') sys.exit(0) # Register signal handler signal.signal(signal.SIGINT, signal_handler) # Load the dataset dataset = load_dataset("Crystalcareai/Orca-Reka", split="train") model_id = "./outkannn" tokenizer_id = model_id tokenizer = AutoTokenizer.from_pretrained(tokenizer_id) tokenizer.padding_side = 'right' # Formatting function for the dataset alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" def formatting_prompts_func(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [] EOS_TOKEN = tokenizer.eos_token for instruction, input, output in zip(instructions, inputs, outputs): text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN texts.append(text) return {"text": texts} # Process and map the formatting function dataset = dataset.map(formatting_prompts_func, batched=True) # Load model model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True ) # Define training arguments args = TrainingArguments( output_dir="./out", num_train_epochs=3, per_device_train_batch_size=4, gradient_checkpointing=True, logging_steps=2, save_strategy="steps", save_steps=300, bf16=True, tf32=True, learning_rate=1e-4, max_grad_norm=0.1, warmup_ratio=0.00, lr_scheduler_type="cosine", push_to_hub=False ) max_seq_length = 2048 # Custom Trainer Class class CustomTrainer(SFTTrainer): def create_optimizer(self): # Override to use SophiaG optimizer self.optimizer = SophiaG(self.model.parameters(), lr=self.args.learning_rate, betas=(0.965, 0.99), rho=0.01, weight_decay=0.1) # Trainer configuration trainer = CustomTrainer( model=model, args=args, train_dataset=dataset, max_seq_length=max_seq_length, tokenizer=tokenizer, dataset_text_field="output", packing=False, ) # Start training trainer.train() # Save model trainer.save_model()