File size: 1,458 Bytes
1fc2558
 
8ec55c7
1fc2558
713b2b5
1fc2558
713b2b5
 
 
8ec55c7
1fc2558
 
 
713b2b5
 
 
1fc2558
 
 
8ec55c7
1fc2558
 
 
 
 
713b2b5
1fc2558
8ec55c7
1fc2558
 
 
85aa03c
713b2b5
 
1fc2558
 
713b2b5
 
1fc2558
 
8ec55c7
1fc2558
713b2b5
02d2aee
 
 
713b2b5
8ec55c7
1fc2558
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from pathlib import Path
from subprocess import run
from typing import Generator

BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"


def convert(
    cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Generator[str, Path, None]:
    # Conversion
    cmd = [
        "python",
        str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
        model_id,
        str(cache_folder),
    ]
    if precision == "FP32":
        cmd.append("--use-fp32")
    yield f"Running command: `{' '.join(cmd)}`"
    run(cmd, check=True)

    # Model file should exist
    f_suffix = "f32" if precision == "FP32" else "f16"
    _, model_name = model_id.split("/")
    model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
    assert model_path.is_file()
    yield f"Model successfully converted to ggml: {model_path}"

    # Quantization
    if quantization:
        q_model_path = (
            cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
        )
        cmd = [
            "./bloomz.cpp/quantize",
            str(model_path),
            str(q_model_path),
            "2",
        ]
        yield f"Running command: `{' '.join(cmd)}`"
        run(cmd, check=True)
        assert q_model_path.is_file()

        # Delete non-quantized file
        model_path.unlink(missing_ok=True)
        model_path = q_model_path
        yield f"Model successfully quantized: {model_path}"

    # Return
    return model_path