File size: 5,070 Bytes
c2552ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig, DataCollatorForLanguageModeling
import evaluate
import pandas as pd
import time
# Quantization configuration
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16
)
# Load model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
# Set the padding token
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
use_auth_token=True
)
print(f"Model {model_name} loaded and quantized in 4 bits")
# Load dataset
dataset = load_dataset("andreshere/counsel_chat", use_auth_token=True)
print(f"Dataset loaded")
# Configure LoRA
lora_config = LoraConfig(
r=32,
lora_alpha=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.1
)
model = get_peft_model(model, lora_config)
# Adjust dataset without weights
def preprocess_function(examples):
inputs = examples['Context']
targets = examples['Response']
model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')
labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length').input_ids
model_inputs["labels"] = labels
model_inputs["Context"] = inputs
model_inputs["Response"] = targets
return model_inputs
tokenized_dataset = dataset.map(preprocess_function, batched=True)
train_dataset = tokenized_dataset['train']
eval_dataset = tokenized_dataset['test']
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
)
# Prepare training arguments
training_args = TrainingArguments(
output_dir='./',
do_train=True,
do_eval=True,
eval_strategy='epoch', # Update to use 'eval_strategy' instead of 'evaluation_strategy'
gradient_accumulation_steps=2,
auto_find_batch_size=True,
weight_decay=0.01,
num_train_epochs=4,
learning_rate=1e-4,
logging_dir='./logs',
logging_strategy="steps",
logging_steps=10,
save_strategy="epoch",
save_total_limit=2,
save_safetensors=True,
eval_steps=10,
report_to="tensorboard",
hub_strategy="every_save"
)
# Initialize trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
data_collator=data_collator
)
# Train the model
trainer.train()
# Save the entire model
trainer.save_model()
# Push the entire model to the Hugging Face Hub
trainer.push_to_hub("andreshere/llama-2-7b-mental-health-counseler")
# Load evaluation metrics
rouge = evaluate.load('rouge')
bleu = evaluate.load('bleu')
bertscore = evaluate.load('bertscore')
meteor = evaluate.load('meteor')
# Function to compute metrics and log compute time
def compute_metrics_and_log_time(pred):
labels_ids = pred.label_ids
pred_ids = pred.predictions
decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
decoded_labels = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
# Compute metrics
rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
bleu_result = bleu.compute(predictions=decoded_preds, references=decoded_labels)
bertscore_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels)
meteor_result = meteor.compute(predictions=decoded_preds, references=decoded_labels)
result = {
"rouge": rouge_result,
"bleu": bleu_result,
"bertscore": bertscore_result,
"meteor": meteor_result,
}
# Log compute time for each response
compute_times = []
contexts = []
original_responses = []
model_responses = []
for i, context in enumerate(pred.features['Context']):
start_time = time.time()
model_response = decoded_preds[i]
end_time = time.time()
compute_time = end_time - start_time
compute_times.append(compute_time)
contexts.append(context)
original_responses.append(decoded_labels[i])
model_responses.append(model_response)
# Save the log to a DataFrame
log_df = pd.DataFrame({
"Context": contexts,
"Original Response": original_responses,
"Model Response": model_responses,
"Compute Time": compute_times
})
log_df.to_csv("response_log.csv", index=False)
return result
# Evaluate model
metrics = trainer.evaluate(eval_dataset=eval_dataset, metric_key_prefix="eval", compute_metrics=compute_metrics_and_log_time)
print(metrics)
# Export metrics to CSV
metrics_df = pd.DataFrame(metrics, index=[0])
metrics_df.to_csv("fine-tuned-metrics.csv", index=False)
|