Llama2_jailbreak / main.py
xuyichang's picture
Upload folder using huggingface_hub
5f76aa6
from huggingface_hub import notebook_login
notebook_login()
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import numpy as np
from torch.utils.data import Subset
dataset_name = "dataset.json"
dataset = load_dataset('json', data_files = dataset_name, split="train")
output_dir = "./results"
base_model_name = "Llama-2-13b-chat-hf"
trained_model_name = os.path.join(output_dir, "final_checkpoint")
model_start = base_model_name
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype='float16',
bnb_4bit_use_double_quant=False
)
device_map = {"": 0}
base_model = AutoModelForCausalLM.from_pretrained(
model_start,
quantization_config=bnb_config,
device_map=device_map
)
base_model.config.use_cache = False
# More info: https://github.com/huggingface/transformers/pull/24906
base_model.config.pretraining_tp = 1
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM",
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
training_args = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=32,
gradient_accumulation_steps=1,
learning_rate=2e-4,
num_train_epochs=10,
max_grad_norm = 0.3,
weight_decay = 0.001,
lr_scheduler_type = "constant",
max_steps = -1,
warmup_ratio = 0.03,
group_by_length = True,
fp16=False,
bf16=True,
optim='paged_adamw_32bit',
logging_steps = 25
)
max_seq_length = 24
trainer = SFTTrainer(
model=base_model,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=max_seq_length,
tokenizer=tokenizer,
args=training_args,
packing=False
)
trainer.train()
import os
output_dir = os.path.join(output_dir, "final_checkpoint")
trainer.model.save_pretrained(output_dir)
base_model = AutoModelForCausalLM.from_pretrained(
model_start,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, output_dir)
model = model.merge_and_unload()
model.save_pretrained(output_dir)