from pathlib import Path
from subprocess import run
from typing import Generator

BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"


def convert(
    cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Generator[str, Path, None]:
    # Conversion
    cmd = [
        "python",
        str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
        model_id,
        str(cache_folder),
    ]
    if precision == "FP32":
        cmd.append("--use-fp32")
    yield f"Running command: `{' '.join(cmd)}`"
    run(cmd, check=True)

    # Model file should exist
    f_suffix = "f32" if precision == "FP32" else "f16"
    _, model_name = model_id.split("/")
    model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
    assert model_path.is_file()
    yield f"Model successfully converted to ggml: {model_path}"

    # Quantization
    if quantization:
        q_model_path = (
            cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
        )
        cmd = [
            "./bloomz.cpp/quantize",
            str(model_path),
            str(q_model_path),
            "2",
        ]
        yield f"Running command: `{' '.join(cmd)}`"
        run(cmd, check=True)
        assert q_model_path.is_file()

        # Delete non-quantized file
        model_path.unlink(missing_ok=True)
        model_path = q_model_path
        yield f"Model successfully quantized: {model_path}"

    # Return
    return model_path