Update models

1e57684 verified 3 months ago

7.47 kB

	import argparse
	import os
	import shutil
	import subprocess
	import time
	from pathlib import Path

	from git import Repo


	def clone_hf_with_git(username: str, model_name: str, saved_dir: str):
	full_model_name = f"{username}/{model_name}"
	url = f"https://huggingface.co/{full_model_name}"
	saved = f"{saved_dir}/{model_name}"

	# perform `git lfs install`
	subprocess.run(["git", "lfs", "install"])

	print(f"[INFO] Cloning {model_name} from {url} ...")
	Repo.clone_from(url, saved)


	def download_hf_with_git(full_name: str, saved_dir: str):
	model_name = full_name.split("/")[1]
	url = f"git@hf.co:{full_name}"
	saved = f"{saved_dir}/{model_name}"

	# perform `git lfs install`
	subprocess.run(["git", "lfs", "install"])

	print(f"Cloning {model_name} from {url} ...")
	subprocess.run(["git", "clone", "--progress", url, saved])


	def convert_hf_to_gguf(
	script_path: str,
	dir_raw_model: str,
	gguf_model_path: str,
	pad_vocab: bool = False,
	):
	if pad_vocab is True:
	args = [
	"--outfile",
	gguf_model_path,
	# "--vocab-type",
	# "bpe",
	"--pad-vocab",
	dir_raw_model,
	]
	else:
	args = ["--outfile", gguf_model_path, dir_raw_model]
	# convert.py for llama-3
	# args = ["--outfile", gguf_model_path, "--vocab-type", "bpe", dir_raw_model]
	res = subprocess.run(["python", script_path] + args)
	print(res)


	def quantize_model(
	quantizer: str,
	f16_gguf_model_path: str,
	quantized_gguf_model_path: str,
	quant_type: str = "q4_0",
	):
	print(f"[INFO] quantizer: {quantizer}")
	print(f"[INFO] quant_type: {quant_type}")
	print(f"[INFO] f16_gguf_model_path: {f16_gguf_model_path}")
	print(f"[INFO] quantized_model_filename: {quantized_gguf_model_path}")
	subprocess.run(
	[
	quantizer,
	f16_gguf_model_path,
	quantized_gguf_model_path,
	quant_type,
	]
	)


	def main():
	parser = argparse.ArgumentParser(description="Convert and quantize gguf models.")
	parser.add_argument(
	"--full-name",
	type=str,
	help="Huggingface model full name. e.g. `username/model_name`",
	)
	parser.add_argument(
	"-s",
	"--saved-dir",
	type=str,
	default="models",
	help="The directory to save the model.",
	)
	parser.add_argument(
	"--enable-converter",
	action="store_true",
	help="Enable the converter. Notice that `--converter` must be specified.",
	)
	parser.add_argument(
	"-c",
	"--converter",
	type=str,
	help="The path to the converter. Notice that `--enable-converter` must be specified if use this option.",
	)
	parser.add_argument(
	"--pad-vocab",
	action="store_true",
	help="Enable adding pad tokens when model vocab expects more than tokenizer metadata provides. Notice that `--enable-converter` must be specified.",
	)
	parser.add_argument(
	"--enable-quantizer",
	action="store_true",
	help="Enable the quantizer. Notice that `--quantizer` must be specified.",
	)
	parser.add_argument(
	"-q",
	"--quantizer",
	type=str,
	help="The path to the quantizer. Notice that `--enable-quantizer` must be specified if use this option.",
	)
	parser.add_argument(
	"-t",
	"--quant-type",
	type=str,
	default=None,
	help="The quantization type. Notice that `--enable-quantizer` must be specified if use this option.",
	)

	args = parser.parse_args()

	print(args)

	print("Download model ...")
	full_name = args.full_name
	username, model_name = full_name.split("/")
	saved_dir = args.saved_dir
	# try:
	# download_hf_with_git(full_name, saved_dir)
	# print(f"The raw model is saved in {saved_dir}.")

	# except Exception as e:
	# print(f"Failed to download model. {e}")
	# return

	if args.enable_converter is True:
	print("[CONVERTER] Convert model ...")
	converter = args.converter

	raw_model_dir = f"{saved_dir}/{model_name}"
	print(f"[CONVERTER] raw_model_dir: {raw_model_dir}")

	gguf_model_dir = Path(raw_model_dir).parent / f"{model_name}-gguf"
	if not gguf_model_dir.exists():
	gguf_model_dir.mkdir()
	f16_gguf_model_path = gguf_model_dir / f"{model_name}-f16.gguf"

	print(f"[CONVERTER] f16_gguf_model_path: {f16_gguf_model_path}")

	# try:
	# convert_hf_to_gguf(
	# converter,
	# raw_model_dir,
	# str(f16_gguf_model_path),
	# args.pad_vocab,
	# )
	# print(f"The converted gguf model is saved in {f16_gguf_model_path}.")

	# except Exception as e:
	# print(f"Failed to convert model. {e}")
	# return

	if args.enable_quantizer is True:
	print("[QUANTIZER] Quantize model ...")
	quantizer = args.quantizer
	print(f"[QUANTIZER] quantizer: {quantizer}")

	if args.quant_type is not None:
	quant_type = args.quant_type
	quantized_gguf_model_path = (
	gguf_model_dir / f"{model_name}-{quant_type}.gguf"
	)

	print(f"[QUANTIZER] quant_type: {quant_type}")
	print(f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}")

	try:
	quantize_model(
	quantizer,
	str(f16_gguf_model_path),
	str(quantized_gguf_model_path),
	quant_type,
	)
	print(
	f"The quantized gguf model is saved in {quantized_gguf_model_path}."
	)

	except Exception as e:
	print(e)
	print("Failed to quantize model.")
	return
	else:
	for quant_type in [
	# "Q2_K",
	# "Q3_K_L",
	# "Q3_K_M",
	# "Q3_K_S",
	# "Q4_0",
	# "Q4_K_M",
	# "Q4_K_S",
	# "Q5_0",
	"Q5_K_M",
	# "Q5_K_S",
	"Q6_K",
	"Q8_0",
	]:
	quantized_gguf_model_path = (
	gguf_model_dir / f"{model_name}-{quant_type}.gguf"
	)

	print(f"[QUANTIZER] quant_type: {quant_type}")
	print(
	f"[QUANTIZER] quantized_model_filename: {quantized_gguf_model_path}"
	)

	try:
	quantize_model(
	quantizer,
	str(f16_gguf_model_path),
	str(quantized_gguf_model_path),
	quant_type,
	)
	print(
	f"The quantized gguf model is saved in {quantized_gguf_model_path}."
	)

	except Exception as e:
	print(e)
	print("Failed to quantize model.")
	return

	# # remove the raw model dir for saving space
	# print(f"The quantization is done. Remove {raw_model_dir}")
	# shutil.rmtree(raw_model_dir)

	print("Done.")


	if __name__ == "__main__":
	main()