wolfsbit-diary-scorer / src /setup_model.py
ff6347's picture
Upload folder using huggingface_hub
9ec3d1d verified
"""
Step 3: Setting up the model for fine-tuning with LoRA
"""
from pathlib import Path
import torch
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer
def load_base_model(model_name: str = "Qwen/Qwen2.5-3B-Instruct"):
"""
Load the base model and tokenizer.
"""
print(f"Loading model: {model_name}")
print("(First run will download ~6GB to ~/.cache/huggingface/)")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Ensure tokenizer has a pad token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Check if MPS (Apple Silicon) is available
if torch.backends.mps.is_available():
print("Using Apple MPS (Metal) backend")
model = AutoModelForCausalLM.from_pretrained(
model_name, dtype=torch.float16, trust_remote_code=True
)
model = model.to("mps")
else:
print("MPS not available, using CPU (this will be slow)")
model = AutoModelForCausalLM.from_pretrained(
model_name, dtype=torch.float32, trust_remote_code=True
)
return model, tokenizer
def apply_lora(model):
"""
Apply LoRA adapters to the model for efficient fine-tuning.
"""
print("\nApplying LoRA configuration...")
lora_config = LoraConfig(
r=16, # Rank of the update matrices
lora_alpha=32, # Scaling factor
target_modules=[ # Which layers to adapt
"q_proj",
"v_proj",
"k_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
lora_dropout=0.05, # Dropout for regularization
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
return model
def setup_for_training(model_name: str = "Qwen/Qwen2.5-3B-Instruct"):
"""
Complete setup: load model and apply LoRA.
"""
model, tokenizer = load_base_model(model_name)
peft_model = apply_lora(model)
return peft_model, tokenizer
def test_inference(model, tokenizer, prompt: str):
"""
Quick test to verify the model works.
"""
print(f"\nTest prompt: {prompt[:50]}...")
device = next(model.parameters()).device
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=10,
do_sample=False,
pad_token_id=tokenizer.pad_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Get only the new tokens (remove the prompt)
new_text = response[len(prompt) :].strip()
print(f"Model output: {new_text}")
return new_text
# Run this script directly to test the setup
if __name__ == "__main__":
print("=" * 60)
print("Step 3: Model Setup Test")
print("=" * 60)
# Verify MPS is available
print(f"\n[Environment Check]")
print(f" MPS Available: {torch.backends.mps.is_available()}")
print(f" MPS Built: {torch.backends.mps.is_built()}")
print(f" PyTorch version: {torch.__version__}")
# Load and setup the model
print(f"\n[Loading Model]")
model, tokenizer = setup_for_training()
print(f"\n[Status]")
print(f" ✓ Model loaded successfully")
print(f" ✓ LoRA adapters applied")
print(f" Device: {next(model.parameters()).device}")
# Quick inference test
print(f"\n[Quick Inference Test]")
test_prompt = "What is 2 + 2? Answer with just the number:"
test_inference(model, tokenizer, test_prompt)
print("\n" + "=" * 60)
print("✓ Setup complete! Ready for training.")
print("=" * 60)
# Summary of what was cached
print(f"\n[Cache Location]")
print(f" Model cached at: ~/.cache/huggingface/hub/")
print(f" (This is reused for future runs)")