import argparse import os import shutil import subprocess import time from pathlib import Path from git import Repo def clone_hf_with_git(username: str, model_name: str, saved_dir: str): full_model_name = f"{username}/{model_name}" url = f"https://huggingface.co/{full_model_name}" saved = f"{saved_dir}/{model_name}" # perform `git lfs install` subprocess.run(["git", "lfs", "install"]) print(f"[INFO] Cloning {model_name} from {url} ...") Repo.clone_from(url, saved) def download_hf_with_git(full_name: str, saved_dir: str): model_name = full_name.split("/")[1] url = f"git@hf.co:{full_name}" saved = f"{saved_dir}/{model_name}" # perform `git lfs install` subprocess.run(["git", "lfs", "install"]) print(f"Cloning {model_name} from {url} ...") subprocess.run(["git", "clone", "--progress", url, saved]) def convert_hf_to_gguf( script_path: str, dir_raw_model: str, gguf_model_path: str, pad_vocab: bool = False, ): if pad_vocab is True: args = [ "--outfile", gguf_model_path, # "--vocab-type", # "bpe", "--pad-vocab", dir_raw_model, ] else: args = ["--outfile", gguf_model_path, dir_raw_model] # convert.py for llama-3 # args = ["--outfile", gguf_model_path, "--vocab-type", "bpe", dir_raw_model] res = subprocess.run(["python", script_path] + args) print(res) def quantize_model( quantizer: str, f16_gguf_model_path: str, quantized_gguf_model_path: str, quant_type: str = "q4_0", ): print(f"[INFO] quantizer: {quantizer}") print(f"[INFO] quant_type: {quant_type}") print(f"[INFO] f16_gguf_model_path: {f16_gguf_model_path}") print(f"[INFO] quantized_model_filename: {quantized_gguf_model_path}") subprocess.run( [ quantizer, f16_gguf_model_path, quantized_gguf_model_path, quant_type, ] ) def main(): parser = argparse.ArgumentParser(description="Convert and quantize gguf models.") parser.add_argument( "--full-name", type=str, help="Huggingface model full name. e.g. `username/model_name`", ) parser.add_argument( "-s", "--saved-dir", type=str, default="models", help="The directory to save the model.", ) parser.add_argument( "--enable-converter", action="store_true", help="Enable the converter. Notice that `--converter` must be specified.", ) parser.add_argument( "-c", "--converter", type=str, help="The path to the converter. Notice that `--enable-converter` must be specified if use this option.", ) parser.add_argument( "--pad-vocab", action="store_true", help="Enable adding pad tokens when model vocab expects more than tokenizer metadata provides. Notice that `--enable-converter` must be specified.", ) parser.add_argument( "--enable-quantizer", action="store_true", help="Enable the quantizer. Notice that `--quantizer` must be specified.", ) parser.add_argument( "-q", "--quantizer", type=str, help="The path to the quantizer. Notice that `--enable-quantizer` must be specified if use this option.", ) parser.add_argument( "-t", "--quant-type", type=str, default=None, help="The quantization type. Notice that `--enable-quantizer` must be specified if use this option.", ) args = parser.parse_args() print(args) print("Download model ...") full_name = args.full_name username, model_name = full_name.split("/") saved_dir = args.saved_dir # try: # download_hf_with_git(full_name, saved_dir) # print(f"The raw model is saved in {saved_dir}.") # except Exception as e: # print(f"Failed to download model. {e}") # return if args.enable_converter is True: print("[CONVERTER] Convert model ...") converter = args.converter raw_model_dir = f"{saved_dir}/{model_name}" print(f"[CONVERTER] raw_model_dir: {raw_model_dir}") gguf_model_dir = Path(raw_model_dir).parent / f"{model_name}-gguf" if not gguf_model_dir.exists(): gguf_model_dir.mkdir() f16_gguf_model_path = gguf_model_dir / f"{model_name}-f16.gguf" print(f"[CONVERTER] f16_gguf_model_path: {f16_gguf_model_path}") # try: # convert_hf_to_gguf( # converter, # raw_model_dir, # str(f16_gguf_model_path), # args.pad_vocab, # ) # print(f"The converted gguf model is saved in {f16_gguf_model_path}.") # except Exception as e: # print(f"Failed to convert model. {e}") # return if args.enable_quantizer is True: print("[QUANTIZER] Quantize model ...") quantizer = args.quantizer print(f"[QUANTIZER] quantizer: {quantizer}") if args.quant_type is not None: quant_type = args.quant_type quantized_gguf_model_path = ( gguf_model_dir / f"{model_name}-{quant_type}.gguf" ) print(f"[QUANTIZER] quant_type: {quant_type}") print(f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}") try: quantize_model( quantizer, str(f16_gguf_model_path), str(quantized_gguf_model_path), quant_type, ) print( f"The quantized gguf model is saved in {quantized_gguf_model_path}." ) except Exception as e: print(e) print("Failed to quantize model.") return else: for quant_type in [ # "Q2_K", # "Q3_K_L", # "Q3_K_M", # "Q3_K_S", # "Q4_0", # "Q4_K_M", # "Q4_K_S", # "Q5_0", "Q5_K_M", # "Q5_K_S", "Q6_K", "Q8_0", ]: quantized_gguf_model_path = ( gguf_model_dir / f"{model_name}-{quant_type}.gguf" ) print(f"[QUANTIZER] quant_type: {quant_type}") print( f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}" ) try: quantize_model( quantizer, str(f16_gguf_model_path), str(quantized_gguf_model_path), quant_type, ) print( f"The quantized gguf model is saved in {quantized_gguf_model_path}." ) except Exception as e: print(e) print("Failed to quantize model.") return # # remove the raw model dir for saving space # print(f"The quantization is done. Remove {raw_model_dir}") # shutil.rmtree(raw_model_dir) print("Done.") if __name__ == "__main__": main()