Phi2-Fine-Tuning / training_phi2.py
cranky-coder08's picture
Add files using upload-large-folder tool
b48a35b verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from transformers import EarlyStoppingCallback
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset
import os
NAME_OF_MODEL = "./merged_tinyllama_logger"
DATASET_PATH = "/app/data/log_dataset.jsonl"
OUTPUT_DIR = "/app/model_output/incremental_1_logs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
#QUANTIZATION CONFIGURATION:
bnb_config = BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_compute_dtype = torch.float16,
bnb_4bit_use_double_quant=True
)
lora_config = LoraConfig(
r=32,
lora_alpha=124,
bias="none",
lora_dropout=0.15,
task_type="CAUSAL_LM"
)
training_args = TrainingArguments(
output_dir = OUTPUT_DIR,
per_device_train_batch_size=4,
gradient_accumulation_steps=16,
learning_rate=1e-4,
weight_decay=0.001,
bf16=False,
max_grad_norm=0.3,
max_steps=-1,
warmup_ratio=0.03,
group_by_length=True,
lr_scheduler_type="cosine",
num_train_epochs=4,
logging_steps=10,
save_steps=25,
fp16=True,
optim="paged_adamw_8bit",
report_to=["tensorboard"],
eval_strategy="steps",
eval_steps=25,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False
)
try:
dataset = load_dataset("json", data_files=DATASET_PATH)
split_dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]
except Exception as e:
print(f"error loading dataset from {DATASET_PATH}: {e}")
exit(1)
print("Loading model with Quantization")
try:
model=AutoModelForCausalLM.from_pretrained(
NAME_OF_MODEL,
quantization_config = bnb_config,
device_map="auto",
trust_remote_code = True,
torch_dtype = torch.float16
)
model.config.pretraining_p=1
print("Model loaded successfully")
except Exception as e:
print("ERROR LOADING MODEL: {e}")
exit(1)
try:
tokenizer = AutoTokenizer.from_pretrained(NAME_OF_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
except Exception as e:
print('ERROR LOADING TOKENIZER: {e}')
exit(1)
trainer=SFTTrainer(
model=model,
train_dataset= train_dataset,
eval_dataset=eval_dataset,
peft_config = lora_config,
dataset_text_field="text",
max_seq_length = 512,
tokenizer = tokenizer,
args=training_args,
packing=False,
callbacks=[EarlyStoppingCallback(early_stopping_patience=7)]
)
print("training started")
trainer.train()
print("fine tuning complete")
trainer.save_model(OUTPUT_DIR)