llama3_FineTuning_TFG / LLAMA_Fine-Tuning.py
eibeel's picture
Update LLAMA_Fine-Tuning.py
466026d verified
raw
history blame
5.73 kB
# -*- coding: utf-8 -*-
"""LLAMA_Fine-Tuning.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1C-kNPOgPiCC9ybxVKhOkWB9ts53APbOb
# Fine-tune Llama 3 in Google Colab
"""
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
!pip install datasets
!pip install --upgrade accelerate peft bitsandbytes transformers trl
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
pipeline,
logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
# The model that you want to train from the Hugging Face hub
model_name = "unsloth/llama-3-8b-bnb-4bit"
# The Hugging Face token
token_name = "XXXX"
# Fine-tuned model name
new_model = "llama3_python_TFG"
################################################################################
# QLoRA parameters
################################################################################
# LoRA attention dimension
lora_r = 64
# Alpha parameter for LoRA scaling
lora_alpha = 16
# Dropout probability for LoRA layers
lora_dropout = 0.1
################################################################################
# bitsandbytes parameters
################################################################################
# Activate 4-bit precision base model loading
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False
################################################################################
# TrainingArguments parameters
################################################################################
# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"
# Number of training epochs
num_train_epochs = 1
# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False
# Batch size per GPU for training
per_device_train_batch_size = 4
# Batch size per GPU for evaluation
per_device_eval_batch_size = 4
# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 2
# Enable gradient checkpointing
gradient_checkpointing = True
# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3
# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4
# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001
# Optimizer to use
optim = "paged_adamw_32bit"
# Learning rate schedule
lr_scheduler_type = "cosine"
# Number of training steps (overrides num_train_epochs)
max_steps = -1
# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03
# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True
# Save checkpoint every X updates steps
save_steps = 0
# Log every X updates steps
logging_steps = 25
################################################################################
# SFT parameters
################################################################################
# Maximum sequence length to use
max_seq_length = None
# Pack multiple short examples in the same input sequence to increase efficiency
packing = False
# Load the entire model on the GPU 0
device_map = {"": 0}
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
from datasets import Dataset
def load_text_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
return [line.strip() for line in f if line.strip()]
train_texts = load_text_file('LLAMA_DatosEntrenamiento.txt')
val_texts = load_text_file('LLAMA_DatosValidacion.txt')
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token_name)
def tokenize_and_encode(texts):
encodings = tokenizer(texts, truncation=True, padding="longest", max_length=512, return_tensors="pt")
encodings['labels'] = encodings['input_ids'].clone() # Duplicar input_ids para usar como labels
return encodings
train_encodings = tokenize_and_encode(train_texts)
val_encodings = tokenize_and_encode(val_texts)
train_dataset = Dataset.from_dict({key: val.numpy() for key, val in train_encodings.items()})
val_dataset = Dataset.from_dict({key: val.numpy() for key, val in val_encodings.items()})
training_arguments = TrainingArguments(
output_dir=output_dir,
evaluation_strategy="steps", # Evaluar basado en el número de pasos
eval_steps=500, # Evaluar cada 500 pasos
num_train_epochs=1,
per_device_train_batch_size=4,
logging_steps=logging_steps,
save_steps=1000, # Guardar el modelo cada 1000 pasos para reducir la frecuencia de escritura en disco
learning_rate=2e-4,
weight_decay=0.001,
lr_scheduler_type="cosine",
warmup_ratio=0.03,
report_to="tensorboard",
fp16=False # Desactivar la precisión mixta para simplificar el entrenamiento
)
model = AutoModelForCausalLM.from_pretrained(model_name, token=token_name)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
model=model,
args=training_arguments,
train_dataset=train_dataset,
eval_dataset=val_dataset,
data_collator=data_collator
)
trainer.train()
model.save_pretrained(new_model)
model.push_to_hub("eibeel/llama3-python-TFG")
# Commented out IPython magic to ensure Python compatibility.
# %load_ext tensorboard
# %tensorboard --logdir results/runs