eibeel
/

llama3_FineTuning_TFG

Model card Files Files and versions Community

llama3_FineTuning_TFG / LLAMA_Fine-Tuning.py

eibeel

Update LLAMA_Fine-Tuning.py

466026d verified 6 months ago

raw

history blame

5.73 kB

	# -- coding: utf-8 --
	"""LLAMA_Fine-Tuning.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1C-kNPOgPiCC9ybxVKhOkWB9ts53APbOb

	# Fine-tune Llama 3 in Google Colab
	"""

	!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

	!pip install datasets

	!pip install --upgrade accelerate peft bitsandbytes transformers trl

	import os
	import torch
	from datasets import load_dataset
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	HfArgumentParser,
	TrainingArguments,
	pipeline,
	logging,
	)
	from peft import LoraConfig, PeftModel
	from trl import SFTTrainer

	# The model that you want to train from the Hugging Face hub
	model_name = "unsloth/llama-3-8b-bnb-4bit"

	# The Hugging Face token
	token_name = "XXXX"


	# Fine-tuned model name
	new_model = "llama3_python_TFG"

	################################################################################
	# QLoRA parameters
	################################################################################

	# LoRA attention dimension
	lora_r = 64

	# Alpha parameter for LoRA scaling
	lora_alpha = 16

	# Dropout probability for LoRA layers
	lora_dropout = 0.1

	################################################################################
	# bitsandbytes parameters
	################################################################################

	# Activate 4-bit precision base model loading
	use_4bit = True

	# Compute dtype for 4-bit base models
	bnb_4bit_compute_dtype = "float16"

	# Quantization type (fp4 or nf4)
	bnb_4bit_quant_type = "nf4"

	# Activate nested quantization for 4-bit base models (double quantization)
	use_nested_quant = False

	################################################################################
	# TrainingArguments parameters
	################################################################################

	# Output directory where the model predictions and checkpoints will be stored
	output_dir = "./results"

	# Number of training epochs
	num_train_epochs = 1

	# Enable fp16/bf16 training (set bf16 to True with an A100)
	fp16 = False
	bf16 = False

	# Batch size per GPU for training
	per_device_train_batch_size = 4

	# Batch size per GPU for evaluation
	per_device_eval_batch_size = 4

	# Number of update steps to accumulate the gradients for
	gradient_accumulation_steps = 2

	# Enable gradient checkpointing
	gradient_checkpointing = True

	# Maximum gradient normal (gradient clipping)
	max_grad_norm = 0.3

	# Initial learning rate (AdamW optimizer)
	learning_rate = 2e-4

	# Weight decay to apply to all layers except bias/LayerNorm weights
	weight_decay = 0.001

	# Optimizer to use
	optim = "paged_adamw_32bit"

	# Learning rate schedule
	lr_scheduler_type = "cosine"

	# Number of training steps (overrides num_train_epochs)
	max_steps = -1

	# Ratio of steps for a linear warmup (from 0 to learning rate)
	warmup_ratio = 0.03

	# Group sequences into batches with same length
	# Saves memory and speeds up training considerably
	group_by_length = True

	# Save checkpoint every X updates steps
	save_steps = 0

	# Log every X updates steps
	logging_steps = 25

	################################################################################
	# SFT parameters
	################################################################################

	# Maximum sequence length to use
	max_seq_length = None

	# Pack multiple short examples in the same input sequence to increase efficiency
	packing = False

	# Load the entire model on the GPU 0
	device_map = {"": 0}

	from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
	from datasets import Dataset

	def load_text_file(file_path):
	with open(file_path, 'r', encoding='utf-8') as f:
	return [line.strip() for line in f if line.strip()]

	train_texts = load_text_file('LLAMA_DatosEntrenamiento.txt')
	val_texts = load_text_file('LLAMA_DatosValidacion.txt')

	tokenizer = AutoTokenizer.from_pretrained(model_name, token=token_name)

	def tokenize_and_encode(texts):
	encodings = tokenizer(texts, truncation=True, padding="longest", max_length=512, return_tensors="pt")
	encodings['labels'] = encodings['input_ids'].clone() # Duplicar input_ids para usar como labels
	return encodings

	train_encodings = tokenize_and_encode(train_texts)
	val_encodings = tokenize_and_encode(val_texts)

	train_dataset = Dataset.from_dict({key: val.numpy() for key, val in train_encodings.items()})
	val_dataset = Dataset.from_dict({key: val.numpy() for key, val in val_encodings.items()})

	training_arguments = TrainingArguments(
	output_dir=output_dir,
	evaluation_strategy="steps", # Evaluar basado en el número de pasos
	eval_steps=500, # Evaluar cada 500 pasos
	num_train_epochs=1,
	per_device_train_batch_size=4,
	logging_steps=logging_steps,
	save_steps=1000, # Guardar el modelo cada 1000 pasos para reducir la frecuencia de escritura en disco
	learning_rate=2e-4,
	weight_decay=0.001,
	lr_scheduler_type="cosine",
	warmup_ratio=0.03,
	report_to="tensorboard",
	fp16=False # Desactivar la precisión mixta para simplificar el entrenamiento
	)

	model = AutoModelForCausalLM.from_pretrained(model_name, token=token_name)

	data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

	trainer = Trainer(
	model=model,
	args=training_arguments,
	train_dataset=train_dataset,
	eval_dataset=val_dataset,
	data_collator=data_collator
	)

	trainer.train()

	model.save_pretrained(new_model)

	model.push_to_hub("eibeel/llama3-python-TFG")


	# Commented out IPython magic to ensure Python compatibility.
	# %load_ext tensorboard
	# %tensorboard --logdir results/runs