""" Utility functions for handling Gemma models """ import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login, HfApi def get_available_models(): """ Returns a list of available Gemma models for fine-tuning. """ return [ "google/gemma-2-2b-it", "google/gemma-2-9b-it", "google/gemma-2-27b-it" ] def load_model(model_name, token=None): """ Loads a model from Hugging Face Hub. Args: model_name: Name of the model to load token: Hugging Face token for access to gated models Returns: Tuple of (model, tokenizer) """ if token: login(token) # Set appropriate device if torch.cuda.is_available(): device = "cuda" elif torch.backends.mps.is_available(): device = "mps" # For Apple Silicon else: device = "cpu" print(f"Loading model {model_name} on {device}...") # Load model with appropriate parameters based on device and model size model_size = model_name.split("-")[2] if device == "cuda": # For CUDA devices, optimize based on model size and available memory if model_size in ["2b", "7b"]: # Smaller models can be loaded in BF16 model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, device_map="auto" ) else: # Larger models may need additional optimizations model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, device_map="auto", load_in_8bit=True ) elif device == "cpu": # For CPU, use FP32 but load 8-bit for larger models to conserve memory if model_size in ["2b"]: model = AutoModelForCausalLM.from_pretrained( model_name, device_map={"": device} ) else: model = AutoModelForCausalLM.from_pretrained( model_name, device_map={"": device}, load_in_8bit=True ) else: # MPS (Apple Silicon) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map={"": device} ) # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) return model, tokenizer def export_model(model_path, output_dir, model_name, format="pytorch", quantization=None): """ Exports a fine-tuned model to the specified format. Args: model_path: Path to the fine-tuned model output_dir: Directory to save the exported model model_name: Name for the exported model format: Export format ("pytorch", "gguf", or "safetensors") quantization: Quantization level for GGUF format Returns: Dictionary with export information """ if not os.path.exists(model_path): raise ValueError(f"Model path '{model_path}' does not exist") os.makedirs(output_dir, exist_ok=True) export_path = os.path.join(output_dir, model_name) os.makedirs(export_path, exist_ok=True) # Load the model and merge LoRA weights if applicable model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Handle different export formats if format.lower() == "pytorch": # Export as PyTorch model model.save_pretrained(export_path) tokenizer.save_pretrained(export_path) elif format.lower() == "safetensors": # Export as safetensors model.save_pretrained(export_path, safe_serialization=True) tokenizer.save_pretrained(export_path) elif format.lower() == "gguf": # For GGUF, we'd typically use a conversion script # This is simplified; in practice you'd use specific tools for GGUF conversion if quantization is not None and quantization.lower() != "none": # Command for quantized GGUF conversion would go here # In practice, use llama.cpp or similar tools pass else: # Command for standard GGUF conversion would go here pass else: raise ValueError(f"Unsupported export format: {format}") # Calculate model size model_size_bytes = sum(p.numel() * p.element_size() for p in model.parameters()) model_size_gb = model_size_bytes / (1024**3) return { "format": format.lower(), "quantization": quantization if format.lower() == "gguf" else "None", "model_name": model_name, "export_path": export_path, "model_size": f"{model_size_gb:.2f} GB" } def push_to_hub(model_path, repo_name, token): """ Pushes a fine-tuned model to Hugging Face Hub. Args: model_path: Path to the fine-tuned model repo_name: Name for the repository on Hugging Face Hub token: Hugging Face token Returns: URL of the uploaded model """ if not os.path.exists(model_path): raise ValueError(f"Model path '{model_path}' does not exist") login(token) # Load the model and merge LoRA weights if applicable model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Push to hub model.push_to_hub(repo_name) tokenizer.push_to_hub(repo_name) # Get the model URL api = HfApi() model_url = f"https://huggingface.co/{repo_name}" return model_url