Spaces:

Wauplin
/

bloomz.cpp-converter

Runtime error

File size: 1,458 Bytes

1fc2558
 
8ec55c7
1fc2558
713b2b5
1fc2558
713b2b5
 
 
8ec55c7
1fc2558
 
 
713b2b5
 
 
1fc2558
 
 
8ec55c7
1fc2558
 
 
 
 
713b2b5
1fc2558
8ec55c7
1fc2558
 
 
85aa03c
713b2b5
 
1fc2558
 
713b2b5
 
1fc2558
 
8ec55c7
1fc2558
713b2b5
02d2aee
 
 
713b2b5
8ec55c7
1fc2558

from pathlib import Path
from subprocess import run
from typing import Generator

BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"


def convert(
    cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Generator[str, Path, None]:
    # Conversion
    cmd = [
        "python",
        str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
        model_id,
        str(cache_folder),
    ]
    if precision == "FP32":
        cmd.append("--use-fp32")
    yield f"Running command: `{' '.join(cmd)}`"
    run(cmd, check=True)

    # Model file should exist
    f_suffix = "f32" if precision == "FP32" else "f16"
    _, model_name = model_id.split("/")
    model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
    assert model_path.is_file()
    yield f"Model successfully converted to ggml: {model_path}"

    # Quantization
    if quantization:
        q_model_path = (
            cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
        )
        cmd = [
            "./bloomz.cpp/quantize",
            str(model_path),
            str(q_model_path),
            "2",
        ]
        yield f"Running command: `{' '.join(cmd)}`"
        run(cmd, check=True)
        assert q_model_path.is_file()

        # Delete non-quantized file
        model_path.unlink(missing_ok=True)
        model_path = q_model_path
        yield f"Model successfully quantized: {model_path}"

    # Return
    return model_path