Spaces:
Runtime error
Runtime error
File size: 1,378 Bytes
1fc2558 8ec55c7 1fc2558 713b2b5 1fc2558 713b2b5 8ec55c7 1fc2558 713b2b5 1fc2558 8ec55c7 1fc2558 713b2b5 1fc2558 8ec55c7 1fc2558 85aa03c 713b2b5 1fc2558 713b2b5 1fc2558 8ec55c7 1fc2558 713b2b5 8ec55c7 1fc2558 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from pathlib import Path
from subprocess import run
from typing import Generator
BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"
def convert(
cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Generator[str, Path, None]:
# Conversion
cmd = [
"python",
str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
model_id,
str(cache_folder),
]
if precision == "FP32":
cmd.append("--use-fp32")
yield f"Running command: `{' '.join(cmd)}`"
run(cmd, check=True)
# Model file should exist
f_suffix = "f32" if precision == "FP32" else "f16"
_, model_name = model_id.split("/")
model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
assert model_path.is_file()
yield f"Model successfully converted to ggml: {model_path}"
# Quantization
if quantization:
q_model_path = (
cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
)
cmd = [
"./bloomz.cpp/quantize",
str(model_path),
str(q_model_path),
"2",
]
yield f"Running command: `{' '.join(cmd)}`"
run(cmd, check=True)
assert q_model_path.is_file()
model_path = q_model_path
yield f"Model successfully quantized: {model_path}"
# Return
return model_path
|