ff6347
/

wolfsbit-diary-scorer

Model card Files Files and versions

wolfsbit-diary-scorer / src /setup_model.py

ff6347's picture

Upload folder using huggingface_hub

9ec3d1d verified 3 months ago

history blame contribute delete

3.96 kB

	"""
	Step 3: Setting up the model for fine-tuning with LoRA
	"""

	from pathlib import Path

	import torch
	from peft import LoraConfig, get_peft_model
	from transformers import AutoModelForCausalLM, AutoTokenizer


	def load_base_model(model_name: str = "Qwen/Qwen2.5-3B-Instruct"):
	"""
	Load the base model and tokenizer.
	"""
	print(f"Loading model: {model_name}")
	print("(First run will download ~6GB to ~/.cache/huggingface/)")

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Ensure tokenizer has a pad token
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Check if MPS (Apple Silicon) is available
	if torch.backends.mps.is_available():
	print("Using Apple MPS (Metal) backend")
	model = AutoModelForCausalLM.from_pretrained(
	model_name, dtype=torch.float16, trust_remote_code=True
	)
	model = model.to("mps")
	else:
	print("MPS not available, using CPU (this will be slow)")
	model = AutoModelForCausalLM.from_pretrained(
	model_name, dtype=torch.float32, trust_remote_code=True
	)

	return model, tokenizer


	def apply_lora(model):
	"""
	Apply LoRA adapters to the model for efficient fine-tuning.
	"""
	print("\nApplying LoRA configuration...")

	lora_config = LoraConfig(
	r=16, # Rank of the update matrices
	lora_alpha=32, # Scaling factor
	target_modules=[ # Which layers to adapt
	"q_proj",
	"v_proj",
	"k_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj",
	],
	lora_dropout=0.05, # Dropout for regularization
	bias="none",
	task_type="CAUSAL_LM",
	)

	model = get_peft_model(model, lora_config)
	model.print_trainable_parameters()

	return model


	def setup_for_training(model_name: str = "Qwen/Qwen2.5-3B-Instruct"):
	"""
	Complete setup: load model and apply LoRA.
	"""
	model, tokenizer = load_base_model(model_name)
	peft_model = apply_lora(model)
	return peft_model, tokenizer


	def test_inference(model, tokenizer, prompt: str):
	"""
	Quick test to verify the model works.
	"""
	print(f"\nTest prompt: {prompt[:50]}...")

	device = next(model.parameters()).device
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=10,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id,
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Get only the new tokens (remove the prompt)
	new_text = response[len(prompt) :].strip()
	print(f"Model output: {new_text}")
	return new_text


	# Run this script directly to test the setup
	if __name__ == "__main__":
	print("=" * 60)
	print("Step 3: Model Setup Test")
	print("=" * 60)

	# Verify MPS is available
	print(f"\n[Environment Check]")
	print(f" MPS Available: {torch.backends.mps.is_available()}")
	print(f" MPS Built: {torch.backends.mps.is_built()}")
	print(f" PyTorch version: {torch.__version__}")

	# Load and setup the model
	print(f"\n[Loading Model]")
	model, tokenizer = setup_for_training()

	print(f"\n[Status]")
	print(f" ✓ Model loaded successfully")
	print(f" ✓ LoRA adapters applied")
	print(f" Device: {next(model.parameters()).device}")

	# Quick inference test
	print(f"\n[Quick Inference Test]")
	test_prompt = "What is 2 + 2? Answer with just the number:"
	test_inference(model, tokenizer, test_prompt)

	print("\n" + "=" * 60)
	print("✓ Setup complete! Ready for training.")
	print("=" * 60)

	# Summary of what was cached
	print(f"\n[Cache Location]")
	print(f" Model cached at: ~/.cache/huggingface/hub/")
	print(f" (This is reused for future runs)")