File size: 2,504 Bytes
d9cfebf b538b3c d9cfebf b538b3c d9cfebf a18ecb8 b538b3c d9cfebf a18ecb8 d9cfebf a16809f d9cfebf a18ecb8 b538b3c d9cfebf b538b3c d9cfebf b538b3c d9cfebf a18ecb8 406313e a18ecb8 406313e a18ecb8 406313e a18ecb8 406313e d9cfebf 406313e d9cfebf 406313e d9cfebf 406313e a18ecb8 406313e d9cfebf a18ecb8 406313e b538b3c 406313e d9cfebf a18ecb8 d9cfebf 406313e b538b3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# train_llama4.py
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig
import datasets
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from accelerate import Accelerator
import huggingface_hub
import os
print("Running train_llama4.py with CPU offloading (version: 2025-04-22 v1)")
# β Authenticate with Hugging Face
LLAMA = os.getenv("LLama")
if not LLAMA:
raise ValueError("LLama token not found. Set it in environment as 'LLama'.")
huggingface_hub.login(token=LLAMA)
# β Tokenizer
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# β Quantization + CPU offβload config
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=True
)
print("Loading model with 8-bit quantization, CPU offload, and automatic device mapping")
model = Llama4ForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
quantization_config=quant_config,
offload_folder="./offload"
)
# β Resize embeddings if pad was added
model.resize_token_embeddings(len(tokenizer))
# β Accelerator prep
accelerator = Accelerator()
model = accelerator.prepare(model)
# β Load training data
dataset = datasets.load_dataset('json', data_files="Bingaman_training_data.json")['train']
# β LoRA setup
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
# β Training arguments
training_args = {
"output_dir": "./results",
"num_train_epochs": 1,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 8,
"optim": "adamw_torch",
"save_steps": 500,
"logging_steps": 100,
"learning_rate": 2e-4,
"fp16": True,
"max_grad_norm": 0.3,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine"
}
# β Initialize Trainer via Accelerate
trainer = accelerator.prepare(
datasets.Trainer(
model=model,
args=datasets.TrainingArguments(**training_args),
train_dataset=dataset
)
)
# β Run training
trainer.train()
model.save_pretrained("./fine_tuned_model")
print("Training completed!")
|