from pathlib import Path from subprocess import run BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp" def convert( cache_folder: Path, model_id: str, precision: str, quantization: bool ) -> Path: # Conversion cmd = [ "python", str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"), model_id, str(cache_folder), ] if precision == "FP32": cmd.append("--use-fp32") run(cmd, check=True) # Model file should exist f_suffix = "f32" if precision == "FP32" else "f16" _, model_name = model_id.split("/") model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin" assert model_path.is_file() # Quantization if quantization: q_model_path = model_path = ( cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin" ) cmd = [ "./bloomz.cpp/quantize", str(model_path), str(q_model_path), "2", ] run(cmd, check=True) assert q_model_path.is_file() model_path = q_model_path # Return return model_path