Spaces:
Build error
Build error
from huggingface_hub import notebook_login | |
notebook_login() | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '0' | |
from datasets import load_dataset | |
import torch | |
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments | |
from peft import LoraConfig, PeftModel | |
from trl import SFTTrainer | |
import numpy as np | |
from torch.utils.data import Subset | |
dataset_name = "dataset.json" | |
dataset = load_dataset('json', data_files = dataset_name, split="train") | |
output_dir = "./results" | |
base_model_name = "Llama-2-13b-chat-hf" | |
trained_model_name = os.path.join(output_dir, "final_checkpoint") | |
model_start = base_model_name | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype='float16', | |
bnb_4bit_use_double_quant=False | |
) | |
device_map = {"": 0} | |
base_model = AutoModelForCausalLM.from_pretrained( | |
model_start, | |
quantization_config=bnb_config, | |
device_map=device_map | |
) | |
base_model.config.use_cache = False | |
# More info: https://github.com/huggingface/transformers/pull/24906 | |
base_model.config.pretraining_tp = 1 | |
peft_config = LoraConfig( | |
lora_alpha=16, | |
lora_dropout=0.1, | |
r=64, | |
bias="none", | |
task_type="CAUSAL_LM", | |
) | |
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
tokenizer.padding_side = "right" | |
training_args = TrainingArguments( | |
output_dir=output_dir, | |
per_device_train_batch_size=32, | |
gradient_accumulation_steps=1, | |
learning_rate=2e-4, | |
num_train_epochs=10, | |
max_grad_norm = 0.3, | |
weight_decay = 0.001, | |
lr_scheduler_type = "constant", | |
max_steps = -1, | |
warmup_ratio = 0.03, | |
group_by_length = True, | |
fp16=False, | |
bf16=True, | |
optim='paged_adamw_32bit', | |
logging_steps = 25 | |
) | |
max_seq_length = 24 | |
trainer = SFTTrainer( | |
model=base_model, | |
train_dataset=dataset, | |
peft_config=peft_config, | |
dataset_text_field="text", | |
max_seq_length=max_seq_length, | |
tokenizer=tokenizer, | |
args=training_args, | |
packing=False | |
) | |
trainer.train() | |
import os | |
output_dir = os.path.join(output_dir, "final_checkpoint") | |
trainer.model.save_pretrained(output_dir) | |
base_model = AutoModelForCausalLM.from_pretrained( | |
model_start, | |
low_cpu_mem_usage=True, | |
return_dict=True, | |
torch_dtype=torch.float16, | |
device_map=device_map, | |
) | |
model = PeftModel.from_pretrained(base_model, output_dir) | |
model = model.merge_and_unload() | |
model.save_pretrained(output_dir) | |