from pathlib import Path from subprocess import run def convert(model_id:str, precision:str, quantization:bool)->Path: # Conversion cmd = [ "python", "./bloomz.cpp/convert-hf-to-ggml.py", model_id, # bigscience/bloomz-560m "./bloomz.cpp/models", ] if precision == "FP32": cmd.append("--use-fp32") run(cmd, check=True) # Model file should exist f_suffix = "f32" if precision == "FP32" else "f16" _, model_name = model_id.split("/") model_path = Path(f"./bloomz.cpp/models/ggml-model-{model_name}-{f_suffix}.bin") assert model_path.is_file() # Quantization if quantization: cmd = [ "./bloomz.cpp/quantize", f"./bloomz.cpp/models/ggml-model-{model_name}-{f_suffix}.bin", f"./bloomz.cpp/models/ggml-model-{model_name}-{f_suffix}-q4_0.bin", "2", ] run(cmd, check=True) model_path = Path(f"./bloomz.cpp/models/ggml-model-{model_name}-{f_suffix}.bin") assert model_path.is_file() # Return return model_path