Can you share the script used for training?

#1
by tarruda - opened

I would like to try training locally, can you can share the script used to obtain the adapter model?

Thanks

sorry i've been out for some weeks, but i can share the script with you.

!pip cache purge
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install -q einops

from huggingface_hub import login
login(token='*******************************************')

from huggingface_hub import notebook_login
notebook_login()

from datasets import load_dataset

dataset = load_dataset('nampdn-ai/tiny-codes', use_auth_token=True)

from sklearn.model_selection import train_test_split
dataset = dataset['train'].train_test_split(test_size=0.30, shuffle=True)
DEFAULT_SYSTEM_PROMPT = """
Below is an instruction that describes a task. Write a response that approximately completes the request
""".strip()

def generate_training_prompt(
prompt: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
return f""" ### Instruction: {system_prompt}

Input:

{prompt.strip()}

Response:

{response}

""".strip()
def generate_text(data_points):
if 'input' in data_points and 'output' in data_points:
return {
'user prompt': data_points['input'],
'user response': data_points['output'],
'text': generate_training_prompt(data_points['input'], data_points['output'])
}

def process_dataset(data):
return (
data.shuffle(seed=42)
.map(generate_text)
.remove_columns(
[
'instruction',
'input',
'output',
])
)

dataset['train'] = process_dataset(dataset['train'])
#dataset['test'] = process_dataset(dataset['test'])

import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

Set the environment variable for CUDA visible devices

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

model_id = "mistralai/Mistral-7B-Instruct-v0.1"

Load the model configuration

config = AutoConfig.from_pretrained(model_id)
config.naive_attention_prefill = True

bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map='auto',
quantization_config=bnb_config,
trust_remote_code=True,
config=config # Pass the updated configuration here
)

Load the tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
model.gradient_checkpointing_enable()

from peft import LoraConfig, get_peft_model

config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

example = generate_text(dataset['train'][0])

dataset = dataset['train'].map(lambda example: tokenizer(example['text']), batched=True)

import transformers

trainer = transformers.Trainer(
model=model,
train_dataset=dataset,
args=transformers.TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=100,
max_steps=10,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()

Thanks. Can you wrap it in markdown code blocks (```)? It won't work the way it is currently formatted

Sign up or log in to comment