from pathlib import Path from subprocess import run from typing import Generator BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp" def convert( cache_folder: Path, model_id: str, precision: str, quantization: bool ) -> Generator[str, Path, None]: # Conversion cmd = [ "python", str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"), model_id, str(cache_folder), ] if precision == "FP32": cmd.append("--use-fp32") yield f"Running command: `{' '.join(cmd)}`" run(cmd, check=True) # Model file should exist f_suffix = "f32" if precision == "FP32" else "f16" _, model_name = model_id.split("/") model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin" assert model_path.is_file() yield f"Model successfully converted to ggml: {model_path}" # Quantization if quantization: q_model_path = ( cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin" ) cmd = [ "./bloomz.cpp/quantize", str(model_path), str(q_model_path), "2", ] yield f"Running command: `{' '.join(cmd)}`" run(cmd, check=True) assert q_model_path.is_file() # Delete non-quantized file model_path.unlink(missing_ok=True) model_path = q_model_path yield f"Model successfully quantized: {model_path}" # Return return model_path