File size: 5,070 Bytes
c2552ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model
from transformers import BitsAndBytesConfig, DataCollatorForLanguageModeling
import evaluate
import pandas as pd
import time

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

# Load model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config=bnb_config, 
    device_map="auto",
    use_auth_token=True
)

print(f"Model {model_name} loaded and quantized in 4 bits")

# Load dataset
dataset = load_dataset("andreshere/counsel_chat", use_auth_token=True)
print(f"Dataset loaded")

# Configure LoRA
lora_config = LoraConfig(
    r=32,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.1
)

model = get_peft_model(model, lora_config)

# Adjust dataset without weights
def preprocess_function(examples):
    inputs = examples['Context']
    targets = examples['Response']
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding='max_length')
    labels = tokenizer(targets, max_length=512, truncation=True, padding='max_length').input_ids
    
    model_inputs["labels"] = labels
    model_inputs["Context"] = inputs
    model_inputs["Response"] = targets
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

train_dataset = tokenized_dataset['train']
eval_dataset = tokenized_dataset['test']

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Prepare training arguments
training_args = TrainingArguments(
    output_dir='./',
    do_train=True,
    do_eval=True,
    eval_strategy='epoch',  # Update to use 'eval_strategy' instead of 'evaluation_strategy'
    gradient_accumulation_steps=2,
    auto_find_batch_size=True,
    weight_decay=0.01,
    num_train_epochs=4,
    learning_rate=1e-4,
    logging_dir='./logs',
    logging_strategy="steps",
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    save_safetensors=True,
    eval_steps=10,
    report_to="tensorboard",
    hub_strategy="every_save"
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator
)

# Train the model
trainer.train()

# Save the entire model
trainer.save_model()

# Push the entire model to the Hugging Face Hub
trainer.push_to_hub("andreshere/llama-2-7b-mental-health-counseler")

# Load evaluation metrics
rouge = evaluate.load('rouge')
bleu = evaluate.load('bleu')
bertscore = evaluate.load('bertscore')
meteor = evaluate.load('meteor')

# Function to compute metrics and log compute time
def compute_metrics_and_log_time(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions
    decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
    
    # Compute metrics
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    bleu_result = bleu.compute(predictions=decoded_preds, references=decoded_labels)
    bertscore_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels)
    meteor_result = meteor.compute(predictions=decoded_preds, references=decoded_labels)
    
    result = {
        "rouge": rouge_result,
        "bleu": bleu_result,
        "bertscore": bertscore_result,
        "meteor": meteor_result,
    }
    
    # Log compute time for each response
    compute_times = []
    contexts = []
    original_responses = []
    model_responses = []

    for i, context in enumerate(pred.features['Context']):
        start_time = time.time()
        model_response = decoded_preds[i]
        end_time = time.time()
        compute_time = end_time - start_time

        compute_times.append(compute_time)
        contexts.append(context)
        original_responses.append(decoded_labels[i])
        model_responses.append(model_response)
    
    # Save the log to a DataFrame
    log_df = pd.DataFrame({
        "Context": contexts,
        "Original Response": original_responses,
        "Model Response": model_responses,
        "Compute Time": compute_times
    })
    
    log_df.to_csv("response_log.csv", index=False)

    return result

# Evaluate model
metrics = trainer.evaluate(eval_dataset=eval_dataset, metric_key_prefix="eval", compute_metrics=compute_metrics_and_log_time)
print(metrics)

# Export metrics to CSV
metrics_df = pd.DataFrame(metrics, index=[0])
metrics_df.to_csv("fine-tuned-metrics.csv", index=False)