|
|
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import LoraConfig,get_peft_model,PeftModel,PeftConfig |
|
from datasets import load_dataset |
|
import bitsandbytes as bnb |
|
import transformers |
|
import torch.nn as nn |
|
import torch |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
'bigscience/bloom-3b', |
|
torch_dtype=torch.float16, |
|
device_map='auto' |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer") |
|
|
|
|
|
print(model) |
|
|
|
|
|
for param in model.parameters(): |
|
param.requires_grad=False |
|
|
|
if param.ndim == 1: |
|
param.data = param.data.to(torch.float32) |
|
|
|
|
|
model.gradient_checkpointing_enable() |
|
model.enable_input_require_grads() |
|
|
|
|
|
class CastOutputToFloat(nn.Sequential): |
|
def forward(self,x): return super().forward(x).to(torch.float32) |
|
|
|
model.lm_head = CastOutputToFloat(model.lm_head) |
|
|
|
|
|
|
|
|
|
|
|
def print_trainable_parameters(model): |
|
trainable_params = 0 |
|
all_params = 0 |
|
|
|
for _, param in model.named_parameters(): |
|
all_params += param.numel() |
|
if param.requires_grad: |
|
trainable_params += param.numel() |
|
|
|
print(f'Trainable Params:{trainable_params}, All Params:{all_params}, trainable % {100 * (trainable_params/all_params)}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
config = LoraConfig( |
|
r=8, |
|
lora_alpha=16, |
|
target_modules=['query_key_value'], |
|
lora_dropout=0.05, |
|
bias="none", |
|
task_type="CAUSAL_LM" |
|
) |
|
|
|
|
|
model = get_peft_model(model=model,peft_config=config) |
|
print_trainable_parameters(model=model) |
|
|
|
|
|
qa_dataset = load_dataset('squad_v2') |
|
|
|
|
|
def create_prompt(context, question, answer): |
|
result = "" |
|
if len(answer['text']) < 1: |
|
result = "I don't the answer" |
|
else: |
|
result = answer['text'][0] |
|
prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### AMSWER\n{result}</s>" |
|
return prompt_template |
|
|
|
mapped_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'],samples['question'],samples['answers']))) |
|
|
|
|
|
trainer = transformers.Trainer( |
|
model=model, |
|
train_dataset=mapped_dataset['train'], |
|
args=transformers.TrainingArguments( |
|
per_device_eval_batch_size=4, |
|
gradient_accumulation_steps=4, |
|
warmup_steps=100, |
|
max_steps=100, |
|
num_train_epochs=3, |
|
learning_rate=1e-3, |
|
fp16=True, |
|
logging_steps=1, |
|
output_dir='outputs' |
|
), |
|
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False) |
|
) |
|
|
|
|
|
model.config.use_cache = False |
|
trainer.train() |
|
|
|
|
|
|
|
model_name = "bloom7b__finetune_sample" |
|
HUGGING_FACE_USER_NAME = "james92" |
|
|
|
model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True) |
|
|
|
print("Model is saved in hggingface") |