llama-2-7b-chat-conseleur-2 / fine-tuning.llama2.py
andreshere's picture
andreshere/llama-2-7b-mental-health-counseler
c2552ff verified
raw
history blame contribute delete
No virus
5.07 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig, DataCollatorForLanguageModeling
import evaluate
import pandas as pd
import time
# Quantization configuration
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16
)
# Load model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
# Set the padding token
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
use_auth_token=True
)
print(f"Model {model_name} loaded and quantized in 4 bits")
# Load dataset
dataset = load_dataset("andreshere/counsel_chat", use_auth_token=True)
print(f"Dataset loaded")
# Configure LoRA
lora_config = LoraConfig(
r=32,
lora_alpha=32,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.1
)
model = get_peft_model(model, lora_config)
# Adjust dataset without weights
def preprocess_function(examples):
inputs = examples['Context']
targets = examples['Response']
model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')
labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length').input_ids
model_inputs["labels"] = labels
model_inputs["Context"] = inputs
model_inputs["Response"] = targets
return model_inputs
tokenized_dataset = dataset.map(preprocess_function, batched=True)
train_dataset = tokenized_dataset['train']
eval_dataset = tokenized_dataset['test']
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
)
# Prepare training arguments
training_args = TrainingArguments(
output_dir='./',
do_train=True,
do_eval=True,
eval_strategy='epoch', # Update to use 'eval_strategy' instead of 'evaluation_strategy'
gradient_accumulation_steps=2,
auto_find_batch_size=True,
weight_decay=0.01,
num_train_epochs=4,
learning_rate=1e-4,
logging_dir='./logs',
logging_strategy="steps",
logging_steps=10,
save_strategy="epoch",
save_total_limit=2,
save_safetensors=True,
eval_steps=10,
report_to="tensorboard",
hub_strategy="every_save"
)
# Initialize trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
data_collator=data_collator
)
# Train the model
trainer.train()
# Save the entire model
trainer.save_model()
# Push the entire model to the Hugging Face Hub
trainer.push_to_hub("andreshere/llama-2-7b-mental-health-counseler")
# Load evaluation metrics
rouge = evaluate.load('rouge')
bleu = evaluate.load('bleu')
bertscore = evaluate.load('bertscore')
meteor = evaluate.load('meteor')
# Function to compute metrics and log compute time
def compute_metrics_and_log_time(pred):
labels_ids = pred.label_ids
pred_ids = pred.predictions
decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
decoded_labels = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
# Compute metrics
rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
bleu_result = bleu.compute(predictions=decoded_preds, references=decoded_labels)
bertscore_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels)
meteor_result = meteor.compute(predictions=decoded_preds, references=decoded_labels)
result = {
"rouge": rouge_result,
"bleu": bleu_result,
"bertscore": bertscore_result,
"meteor": meteor_result,
}
# Log compute time for each response
compute_times = []
contexts = []
original_responses = []
model_responses = []
for i, context in enumerate(pred.features['Context']):
start_time = time.time()
model_response = decoded_preds[i]
end_time = time.time()
compute_time = end_time - start_time
compute_times.append(compute_time)
contexts.append(context)
original_responses.append(decoded_labels[i])
model_responses.append(model_response)
# Save the log to a DataFrame
log_df = pd.DataFrame({
"Context": contexts,
"Original Response": original_responses,
"Model Response": model_responses,
"Compute Time": compute_times
})
log_df.to_csv("response_log.csv", index=False)
return result
# Evaluate model
metrics = trainer.evaluate(eval_dataset=eval_dataset, metric_key_prefix="eval", compute_metrics=compute_metrics_and_log_time)
print(metrics)
# Export metrics to CSV
metrics_df = pd.DataFrame(metrics, index=[0])
metrics_df.to_csv("fine-tuned-metrics.csv", index=False)