bloomz.cpp-converter / convert.py
testbot
Requested changes
8ec55c7
raw
history blame
1.38 kB
from pathlib import Path
from subprocess import run
from typing import Generator
BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"
def convert(
cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Generator[str, Path, None]:
# Conversion
cmd = [
"python",
str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
model_id,
str(cache_folder),
]
if precision == "FP32":
cmd.append("--use-fp32")
yield f"Running command: `{' '.join(cmd)}`"
run(cmd, check=True)
# Model file should exist
f_suffix = "f32" if precision == "FP32" else "f16"
_, model_name = model_id.split("/")
model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
assert model_path.is_file()
yield f"Model successfully converted to ggml: {model_path}"
# Quantization
if quantization:
q_model_path = (
cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
)
cmd = [
"./bloomz.cpp/quantize",
str(model_path),
str(q_model_path),
"2",
]
yield f"Running command: `{' '.join(cmd)}`"
run(cmd, check=True)
assert q_model_path.is_file()
model_path = q_model_path
yield f"Model successfully quantized: {model_path}"
# Return
return model_path