# coding=utf-8 # Original Scripts are # https://gist.github.com/SunMarc/dcdb499ac16d355a8f265aa497645996 # and # https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da # changed by webbigdata for use_safetensors. # Copyright 2023 The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from dataclasses import dataclass, field from typing import Optional import torch from datasets import load_dataset from peft import LoraConfig from transformers import ( AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, AutoTokenizer, TrainingArguments, ) from peft import prepare_model_for_kbit_training, get_peft_model from transformers import GPTQConfig from trl import SFTTrainer # This example fine-tunes Llama 2 model on Guanaco dataset # using GPTQ and peft. # Use it by correctly passing --model_name argument when running the # script. The default model is ybelkada/llama-7b-GPTQ-test # Versions used: # accelerate == 0.21.0 # auto-gptq == 0.4.2 # trl == 0.4.7 # peft from source # transformers from source # optimum from source # For models that have `config.pretraining_tp > 1` install: # pip install git+https://github.com/huggingface/transformers.git @dataclass class ScriptArguments: """ These arguments vary depending on how many GPUs you have, what their capacity and features are, and what size model you want to train. """ local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"}) per_device_train_batch_size: Optional[int] = field(default=4) per_device_eval_batch_size: Optional[int] = field(default=1) gradient_accumulation_steps: Optional[int] = field(default=4) learning_rate: Optional[float] = field(default=2e-4) max_grad_norm: Optional[float] = field(default=0.3) weight_decay: Optional[int] = field(default=0.001) lora_alpha: Optional[int] = field(default=16) lora_dropout: Optional[float] = field(default=0.1) lora_r: Optional[int] = field(default=64) max_seq_length: Optional[int] = field(default=512) model_name: Optional[str] = field( default="./", metadata={ "help": "The model that you want to train from the Hugging Face hub. E.g. gpt2, gpt2-xl, bert, etc." } ) dataset_name: Optional[str] = field( default="timdettmers/openassistant-guanaco", metadata={"help": "The preference dataset to use."}, ) num_train_epochs: Optional[int] = field( default=1, metadata={"help": "The number of training epochs for the reward model."}, ) fp16: Optional[bool] = field( default=False, metadata={"help": "Enables fp16 training."}, ) bf16: Optional[bool] = field( default=False, metadata={"help": "Enables bf16 training."}, ) packing: Optional[bool] = field( default=False, metadata={"help": "Use packing dataset creating."}, ) gradient_checkpointing: Optional[bool] = field( default=True, metadata={"help": "Enables gradient checkpointing."}, ) optim: Optional[str] = field( default="adamw_hf", metadata={"help": "The optimizer to use."}, ) lr_scheduler_type: str = field( default="constant", metadata={"help": "Learning rate schedule. Constant a bit better than cosine, and has advantage for analysis"}, ) max_steps: int = field(default=10000, metadata={"help": "How many optimizer update steps to take"}) warmup_ratio: float = field(default=0.03, metadata={"help": "Fraction of steps to do a warmup for"}) group_by_length: bool = field( default=True, metadata={ "help": "Group sequences into batches with same length. Saves memory and speeds up training considerably." }, ) save_steps: int = field(default=10, metadata={"help": "Save checkpoint every X updates steps."}) logging_steps: int = field(default=10, metadata={"help": "Log every X updates steps."}) merge_and_push: Optional[bool] = field( default=False, metadata={"help": "Merge and push weights after training"}, ) output_dir: str = field( default="./results", metadata={"help": "The output directory where the model predictions and checkpoints will be written."}, ) parser = HfArgumentParser(ScriptArguments) script_args = parser.parse_args_into_dataclasses()[0] def create_and_prepare_model(args): major, _ = torch.cuda.get_device_capability() if major >= 8: print("=" * 80) print("Your GPU supports bfloat16, you can accelerate training with the argument --bf16") print("=" * 80) # Load the entire model on the GPU 0 #device_map = {"":0} # switch to `device_map = "auto"` for multi-GPU device_map = "auto" # need to disable exllama kernel # exllama kernel are not very stable for training model = AutoModelForCausalLM.from_pretrained( args.model_name, device_map=device_map, use_safetensors=True, local_files_only=True, quantization_config= GPTQConfig(bits=4, disable_exllama=True) ) # check: https://github.com/huggingface/transformers/pull/24906 model.config.pretraining_tp = 1 peft_config = LoraConfig( lora_alpha=script_args.lora_alpha, lora_dropout=script_args.lora_dropout, r=script_args.lora_r, bias="none", task_type="CAUSAL_LM", ) tokenizer = AutoTokenizer.from_pretrained(script_args.model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token return model, peft_config, tokenizer training_arguments = TrainingArguments( output_dir=script_args.output_dir, per_device_train_batch_size=script_args.per_device_train_batch_size, gradient_accumulation_steps=script_args.gradient_accumulation_steps, optim=script_args.optim, save_steps=script_args.save_steps, logging_steps=script_args.logging_steps, learning_rate=script_args.learning_rate, fp16=script_args.fp16, bf16=script_args.bf16, max_grad_norm=script_args.max_grad_norm, max_steps=script_args.max_steps, warmup_ratio=script_args.warmup_ratio, group_by_length=script_args.group_by_length, lr_scheduler_type=script_args.lr_scheduler_type, ) model, peft_config, tokenizer = create_and_prepare_model(script_args) model = prepare_model_for_kbit_training(model) model = get_peft_model(model, peft_config) model.config.use_cache = False dataset = load_dataset("csv", data_files="jawiki3.csv", split='train') # Fix weird overflow issue with fp16 training tokenizer.padding_side = "right" trainer = SFTTrainer( model=model, train_dataset=dataset, dataset_text_field="QuestionAnswer", max_seq_length=script_args.max_seq_length, tokenizer=tokenizer, args=training_arguments, packing=script_args.packing, ) trainer.train() if script_args.merge_and_push: output_dir = os.path.join(script_args.output_dir, "final_checkpoints") trainer.model.save_pretrained(output_dir) # Free memory for merging weights del model torch.cuda.empty_cache()