| | """ |
| | Step 3: Setting up the model for fine-tuning with LoRA |
| | """ |
| |
|
| | from pathlib import Path |
| |
|
| | import torch |
| | from peft import LoraConfig, get_peft_model |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| |
|
| |
|
| | def load_base_model(model_name: str = "Qwen/Qwen2.5-3B-Instruct"): |
| | """ |
| | Load the base model and tokenizer. |
| | """ |
| | print(f"Loading model: {model_name}") |
| | print("(First run will download ~6GB to ~/.cache/huggingface/)") |
| |
|
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| |
|
| | |
| | if tokenizer.pad_token is None: |
| | tokenizer.pad_token = tokenizer.eos_token |
| |
|
| | |
| | if torch.backends.mps.is_available(): |
| | print("Using Apple MPS (Metal) backend") |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, dtype=torch.float16, trust_remote_code=True |
| | ) |
| | model = model.to("mps") |
| | else: |
| | print("MPS not available, using CPU (this will be slow)") |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, dtype=torch.float32, trust_remote_code=True |
| | ) |
| |
|
| | return model, tokenizer |
| |
|
| |
|
| | def apply_lora(model): |
| | """ |
| | Apply LoRA adapters to the model for efficient fine-tuning. |
| | """ |
| | print("\nApplying LoRA configuration...") |
| |
|
| | lora_config = LoraConfig( |
| | r=16, |
| | lora_alpha=32, |
| | target_modules=[ |
| | "q_proj", |
| | "v_proj", |
| | "k_proj", |
| | "o_proj", |
| | "gate_proj", |
| | "up_proj", |
| | "down_proj", |
| | ], |
| | lora_dropout=0.05, |
| | bias="none", |
| | task_type="CAUSAL_LM", |
| | ) |
| |
|
| | model = get_peft_model(model, lora_config) |
| | model.print_trainable_parameters() |
| |
|
| | return model |
| |
|
| |
|
| | def setup_for_training(model_name: str = "Qwen/Qwen2.5-3B-Instruct"): |
| | """ |
| | Complete setup: load model and apply LoRA. |
| | """ |
| | model, tokenizer = load_base_model(model_name) |
| | peft_model = apply_lora(model) |
| | return peft_model, tokenizer |
| |
|
| |
|
| | def test_inference(model, tokenizer, prompt: str): |
| | """ |
| | Quick test to verify the model works. |
| | """ |
| | print(f"\nTest prompt: {prompt[:50]}...") |
| |
|
| | device = next(model.parameters()).device |
| | inputs = tokenizer(prompt, return_tensors="pt").to(device) |
| |
|
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=10, |
| | do_sample=False, |
| | pad_token_id=tokenizer.pad_token_id, |
| | ) |
| |
|
| | response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | |
| | new_text = response[len(prompt) :].strip() |
| | print(f"Model output: {new_text}") |
| | return new_text |
| |
|
| |
|
| | |
| | if __name__ == "__main__": |
| | print("=" * 60) |
| | print("Step 3: Model Setup Test") |
| | print("=" * 60) |
| |
|
| | |
| | print(f"\n[Environment Check]") |
| | print(f" MPS Available: {torch.backends.mps.is_available()}") |
| | print(f" MPS Built: {torch.backends.mps.is_built()}") |
| | print(f" PyTorch version: {torch.__version__}") |
| |
|
| | |
| | print(f"\n[Loading Model]") |
| | model, tokenizer = setup_for_training() |
| |
|
| | print(f"\n[Status]") |
| | print(f" ✓ Model loaded successfully") |
| | print(f" ✓ LoRA adapters applied") |
| | print(f" Device: {next(model.parameters()).device}") |
| |
|
| | |
| | print(f"\n[Quick Inference Test]") |
| | test_prompt = "What is 2 + 2? Answer with just the number:" |
| | test_inference(model, tokenizer, test_prompt) |
| |
|
| | print("\n" + "=" * 60) |
| | print("✓ Setup complete! Ready for training.") |
| | print("=" * 60) |
| |
|
| | |
| | print(f"\n[Cache Location]") |
| | print(f" Model cached at: ~/.cache/huggingface/hub/") |
| | print(f" (This is reused for future runs)") |
| |
|