#!/usr/bin/env python3 """ Script to download a HuggingFace model and convert it to GGUF format for use with llama.cpp """ import argparse import subprocess import sys import shutil from pathlib import Path def install_packages(): """Ensure required packages are installed.""" try: print("Installing required packages…") subprocess.run( [sys.executable, "-m", "pip", "install", "huggingface_hub", "--upgrade"], check=True ) subprocess.run( [sys.executable, "-m", "pip", "install", "sentencepiece"], check=True ) subprocess.run( [sys.executable, "-m", "pip", "install", "--no-cache-dir", "--force-reinstall", "llama-cpp-python[server]"], check=True ) print("✅ Required packages installed successfully") except subprocess.CalledProcessError as e: print(f"❌ Failed to install required packages: {e}") sys.exit(1) def download_model(repo_id: str, temp_dir: Path) -> str: """Download the model snapshot into temp_dir.""" print(f"📥 Downloading model {repo_id}…") if temp_dir.exists(): shutil.rmtree(temp_dir) temp_dir.mkdir(parents=True, exist_ok=True) temp_str = str(temp_dir.resolve()) try: subprocess.run([ sys.executable, "-c", f"from huggingface_hub import snapshot_download; " f"snapshot_download(repo_id='{repo_id}', local_dir=r'{temp_str}')" ], check=True) print("✅ Model downloaded successfully") except subprocess.CalledProcessError as e: print(f"❌ Failed to download model: {e}") sys.exit(1) return temp_str def convert_to_gguf(temp_str: str, output_file: Path, quantize: str): """Convert the downloaded model to GGUF using llama.cpp's converter script.""" print(f"🔄 Converting to GGUF format with {quantize} quantization…") # Clone llama.cpp if missing repo_dir = Path("llama.cpp") if not repo_dir.exists(): print("📥 Cloning llama.cpp for conversion script…") subprocess.run( ["git", "clone", "https://github.com/ggerganov/llama.cpp.git", str(repo_dir)], check=True ) # Locate converter script (top-level or fallback) converter = repo_dir / "convert_hf_to_gguf.py" if not converter.exists(): converter = repo_dir / "convert.py" if not converter.exists(): print(f"❌ Converter script not found in {repo_dir}") sys.exit(1) # Map user quantize to supported outtype valid_outtypes = {"f32", "f16", "bf16", "q8_0", "tq1_0", "tq2_0", "auto"} scheme = quantize.lower() if scheme not in valid_outtypes: print(f"⚠️ Quantization scheme '{quantize}' not supported; defaulting to 'auto'.") scheme = "auto" cmd = [ sys.executable, str(converter), temp_str, "--outfile", str(output_file), "--outtype", scheme ] print("Running conversion:", " ".join(cmd)) try: subprocess.run(cmd, check=True) print(f"✅ Model converted and saved to {output_file}") except subprocess.CalledProcessError as e: print(f"❌ Conversion failed: {e}") sys.exit(1) def main(): parser = argparse.ArgumentParser(description="Download & convert a HuggingFace model to GGUF for llama.cpp") parser.add_argument("--model", type=str, default="microsoft/Phi-3-mini-4k-instruct", help="HuggingFace model ID to download") parser.add_argument("--quantize", type=str, default="Q4_K_M", help="Quantization type (e.g., Q4_K_M, Q5_K_M, Q8_0)") parser.add_argument("--output_dir", type=str, default="models", help="Directory to save converted GGUF file") args = parser.parse_args() # Prepare output directory output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) model_name = args.model.split("/")[-1] output_file = output_dir / f"{model_name}.{args.quantize}.gguf" if output_file.exists(): print(f"⚠️ {output_file} already exists, skipping conversion.") return # Run steps install_packages() temp_dir = Path.cwd() / "tmp_model" temp_str = download_model(args.model, temp_dir) convert_to_gguf(temp_str, output_file, args.quantize) # Cleanup print("🧹 Cleaning up temporary files…") shutil.rmtree(temp_dir, ignore_errors=True) print(f"🎉 All done! Your GGUF model is ready at {output_file}") if __name__ == "__main__": main()