Spaces:

multimodalart
/

wan-2-2-first-last-frame

Running on Zero

App Files Files Community

wan-2-2-first-last-frame / custom_nodes /ComfyUI-GGUF /tools /convert.py

multimodalart HF Staff

Upload 83 files

708238a verified 3 months ago

raw

history blame

13.5 kB

	# (c) City96 \|\| Apache-2.0 (apache.org/licenses/LICENSE-2.0)
	import os
	import gguf
	import torch
	import logging
	import argparse
	from tqdm import tqdm
	from safetensors.torch import load_file, save_file

	QUANTIZATION_THRESHOLD = 1024
	REARRANGE_THRESHOLD = 512
	MAX_TENSOR_NAME_LENGTH = 127
	MAX_TENSOR_DIMS = 4

	class ModelTemplate:
	arch = "invalid" # string describing architecture
	shape_fix = False # whether to reshape tensors
	keys_detect = [] # list of lists to match in state dict
	keys_banned = [] # list of keys that should mark model as invalid for conversion
	keys_hiprec = [] # list of keys that need to be kept in fp32 for some reason
	keys_ignore = [] # list of strings to ignore keys by when found

	def handle_nd_tensor(self, key, data):
	raise NotImplementedError(f"Tensor detected that exceeds dims supported by C++ code! ({key} @ {data.shape})")

	class ModelFlux(ModelTemplate):
	arch = "flux"
	keys_detect = [
	("transformer_blocks.0.attn.norm_added_k.weight",),
	("double_blocks.0.img_attn.proj.weight",),
	]
	keys_banned = ["transformer_blocks.0.attn.norm_added_k.weight",]

	class ModelSD3(ModelTemplate):
	arch = "sd3"
	keys_detect = [
	("transformer_blocks.0.attn.add_q_proj.weight",),
	("joint_blocks.0.x_block.attn.qkv.weight",),
	]
	keys_banned = ["transformer_blocks.0.attn.add_q_proj.weight",]

	class ModelAura(ModelTemplate):
	arch = "aura"
	keys_detect = [
	("double_layers.3.modX.1.weight",),
	("joint_transformer_blocks.3.ff_context.out_projection.weight",),
	]
	keys_banned = ["joint_transformer_blocks.3.ff_context.out_projection.weight",]

	class ModelHiDream(ModelTemplate):
	arch = "hidream"
	keys_detect = [
	(
	"caption_projection.0.linear.weight",
	"double_stream_blocks.0.block.ff_i.shared_experts.w3.weight"
	)
	]
	keys_hiprec = [
	# nn.parameter, can't load from BF16 ver
	".ff_i.gate.weight",
	"img_emb.emb_pos"
	]

	class CosmosPredict2(ModelTemplate):
	arch = "cosmos"
	keys_detect = [
	(
	"blocks.0.mlp.layer1.weight",
	"blocks.0.adaln_modulation_cross_attn.1.weight",
	)
	]
	keys_hiprec = ["pos_embedder"]
	keys_ignore = ["_extra_state", "accum_"]

	class ModelHyVid(ModelTemplate):
	arch = "hyvid"
	keys_detect = [
	(
	"double_blocks.0.img_attn_proj.weight",
	"txt_in.individual_token_refiner.blocks.1.self_attn_qkv.weight",
	)
	]

	def handle_nd_tensor(self, key, data):
	# hacky but don't have any better ideas
	path = f"./fix_5d_tensors_{self.arch}.safetensors" # TODO: somehow get a path here??
	if os.path.isfile(path):
	raise RuntimeError(f"5D tensor fix file already exists! {path}")
	fsd = {key: torch.from_numpy(data)}
	tqdm.write(f"5D key found in state dict! Manual fix required! - {key} {data.shape}")
	save_file(fsd, path)

	class ModelWan(ModelHyVid):
	arch = "wan"
	keys_detect = [
	(
	"blocks.0.self_attn.norm_q.weight",
	"text_embedding.2.weight",
	"head.modulation",
	)
	]
	keys_hiprec = [
	".modulation" # nn.parameter, can't load from BF16 ver
	]

	class ModelLTXV(ModelTemplate):
	arch = "ltxv"
	keys_detect = [
	(
	"adaln_single.emb.timestep_embedder.linear_2.weight",
	"transformer_blocks.27.scale_shift_table",
	"caption_projection.linear_2.weight",
	)
	]
	keys_hiprec = [
	"scale_shift_table" # nn.parameter, can't load from BF16 base quant
	]

	class ModelSDXL(ModelTemplate):
	arch = "sdxl"
	shape_fix = True
	keys_detect = [
	("down_blocks.0.downsamplers.0.conv.weight", "add_embedding.linear_1.weight",),
	(
	"input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight",
	"output_blocks.2.2.conv.weight", "output_blocks.5.2.conv.weight",
	), # Non-diffusers
	("label_emb.0.0.weight",),
	]

	class ModelSD1(ModelTemplate):
	arch = "sd1"
	shape_fix = True
	keys_detect = [
	("down_blocks.0.downsamplers.0.conv.weight",),
	(
	"input_blocks.3.0.op.weight", "input_blocks.6.0.op.weight", "input_blocks.9.0.op.weight",
	"output_blocks.2.1.conv.weight", "output_blocks.5.2.conv.weight", "output_blocks.8.2.conv.weight"
	), # Non-diffusers
	]

	class ModelLumina2(ModelTemplate):
	arch = "lumina2"
	keys_detect = [
	("cap_embedder.1.weight", "context_refiner.0.attention.qkv.weight")
	]

	arch_list = [ModelFlux, ModelSD3, ModelAura, ModelHiDream, CosmosPredict2,
	ModelLTXV, ModelHyVid, ModelWan, ModelSDXL, ModelSD1, ModelLumina2]

	def is_model_arch(model, state_dict):
	# check if model is correct
	matched = False
	invalid = False
	for match_list in model.keys_detect:
	if all(key in state_dict for key in match_list):
	matched = True
	invalid = any(key in state_dict for key in model.keys_banned)
	break
	assert not invalid, "Model architecture not allowed for conversion! (i.e. reference VS diffusers format)"
	return matched

	def detect_arch(state_dict):
	model_arch = None
	for arch in arch_list:
	if is_model_arch(arch, state_dict):
	model_arch = arch()
	break
	assert model_arch is not None, "Unknown model architecture!"
	return model_arch

	def parse_args():
	parser = argparse.ArgumentParser(description="Generate F16 GGUF files from single UNET")
	parser.add_argument("--src", required=True, help="Source model ckpt file.")
	parser.add_argument("--dst", help="Output unet gguf file.")
	args = parser.parse_args()

	if not os.path.isfile(args.src):
	parser.error("No input provided!")

	return args

	def strip_prefix(state_dict):
	# prefix for mixed state dict
	prefix = None
	for pfx in ["model.diffusion_model.", "model."]:
	if any([x.startswith(pfx) for x in state_dict.keys()]):
	prefix = pfx
	break

	# prefix for uniform state dict
	if prefix is None:
	for pfx in ["net."]:
	if all([x.startswith(pfx) for x in state_dict.keys()]):
	prefix = pfx
	break

	# strip prefix if found
	if prefix is not None:
	logging.info(f"State dict prefix found: '{prefix}'")
	sd = {}
	for k, v in state_dict.items():
	if prefix not in k:
	continue
	k = k.replace(prefix, "")
	sd[k] = v
	else:
	logging.debug("State dict has no prefix")
	sd = state_dict

	return sd

	def load_state_dict(path):
	if any(path.endswith(x) for x in [".ckpt", ".pt", ".bin", ".pth"]):
	state_dict = torch.load(path, map_location="cpu", weights_only=True)
	for subkey in ["model", "module"]:
	if subkey in state_dict:
	state_dict = state_dict[subkey]
	break
	if len(state_dict) < 20:
	raise RuntimeError(f"pt subkey load failed: {state_dict.keys()}")
	else:
	state_dict = load_file(path)

	return strip_prefix(state_dict)

	def handle_tensors(writer, state_dict, model_arch):
	name_lengths = tuple(sorted(
	((key, len(key)) for key in state_dict.keys()),
	key=lambda item: item[1],
	reverse=True,
	))
	if not name_lengths:
	return
	max_name_len = name_lengths[0][1]
	if max_name_len > MAX_TENSOR_NAME_LENGTH:
	bad_list = ", ".join(f"{key!r} ({namelen})" for key, namelen in name_lengths if namelen > MAX_TENSOR_NAME_LENGTH)
	raise ValueError(f"Can only handle tensor names up to {MAX_TENSOR_NAME_LENGTH} characters. Tensors exceeding the limit: {bad_list}")
	for key, data in tqdm(state_dict.items()):
	old_dtype = data.dtype

	if any(x in key for x in model_arch.keys_ignore):
	tqdm.write(f"Filtering ignored key: '{key}'")
	continue

	if data.dtype == torch.bfloat16:
	data = data.to(torch.float32).numpy()
	# this is so we don't break torch 2.0.X
	elif data.dtype in [getattr(torch, "float8_e4m3fn", "_invalid"), getattr(torch, "float8_e5m2", "_invalid")]:
	data = data.to(torch.float16).numpy()
	else:
	data = data.numpy()

	n_dims = len(data.shape)
	data_shape = data.shape
	if old_dtype == torch.bfloat16:
	data_qtype = gguf.GGMLQuantizationType.BF16
	# elif old_dtype == torch.float32:
	# data_qtype = gguf.GGMLQuantizationType.F32
	else:
	data_qtype = gguf.GGMLQuantizationType.F16

	# The max no. of dimensions that can be handled by the quantization code is 4
	if len(data.shape) > MAX_TENSOR_DIMS:
	model_arch.handle_nd_tensor(key, data)
	continue # needs to be added back later

	# get number of parameters (AKA elements) in this tensor
	n_params = 1
	for dim_size in data_shape:
	n_params *= dim_size

	if old_dtype in (torch.float32, torch.bfloat16):
	if n_dims == 1:
	# one-dimensional tensors should be kept in F32
	# also speeds up inference due to not dequantizing
	data_qtype = gguf.GGMLQuantizationType.F32

	elif n_params <= QUANTIZATION_THRESHOLD:
	# very small tensors
	data_qtype = gguf.GGMLQuantizationType.F32

	elif any(x in key for x in model_arch.keys_hiprec):
	# tensors that require max precision
	data_qtype = gguf.GGMLQuantizationType.F32

	if (model_arch.shape_fix # NEVER reshape for models such as flux
	and n_dims > 1 # Skip one-dimensional tensors
	and n_params >= REARRANGE_THRESHOLD # Only rearrange tensors meeting the size requirement
	and (n_params / 256).is_integer() # Rearranging only makes sense if total elements is divisible by 256
	and not (data.shape[-1] / 256).is_integer() # Only need to rearrange if the last dimension is not divisible by 256
	):
	orig_shape = data.shape
	data = data.reshape(n_params // 256, 256)
	writer.add_array(f"comfy.gguf.orig_shape.{key}", tuple(int(dim) for dim in orig_shape))

	try:
	data = gguf.quants.quantize(data, data_qtype)
	except (AttributeError, gguf.QuantError) as e:
	tqdm.write(f"falling back to F16: {e}")
	data_qtype = gguf.GGMLQuantizationType.F16
	data = gguf.quants.quantize(data, data_qtype)

	new_name = key # do we need to rename?

	shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
	tqdm.write(f"{f'%-{max_name_len + 4}s' % f'{new_name}'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")

	writer.add_tensor(new_name, data, raw_dtype=data_qtype)

	def convert_file(path, dst_path=None, interact=True, overwrite=False):
	# load & run model detection logic
	state_dict = load_state_dict(path)
	model_arch = detect_arch(state_dict)
	logging.info(f"* Architecture detected from input: {model_arch.arch}")

	# detect & set dtype for output file
	dtypes = [x.dtype for x in state_dict.values()]
	dtypes = {x:dtypes.count(x) for x in set(dtypes)}
	main_dtype = max(dtypes, key=dtypes.get)

	if main_dtype == torch.bfloat16:
	ftype_name = "BF16"
	ftype_gguf = gguf.LlamaFileType.MOSTLY_BF16
	# elif main_dtype == torch.float32:
	# ftype_name = "F32"
	# ftype_gguf = None
	else:
	ftype_name = "F16"
	ftype_gguf = gguf.LlamaFileType.MOSTLY_F16

	if dst_path is None:
	dst_path = f"{os.path.splitext(path)[0]}-{ftype_name}.gguf"
	elif "{ftype}" in dst_path: # lcpp logic
	dst_path = dst_path.replace("{ftype}", ftype_name)

	if os.path.isfile(dst_path) and not overwrite:
	if interact:
	input("Output exists enter to continue or ctrl+c to abort!")
	else:
	raise OSError("Output exists and overwriting is disabled!")

	# handle actual file
	writer = gguf.GGUFWriter(path=None, arch=model_arch.arch)
	writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
	if ftype_gguf is not None:
	writer.add_file_type(ftype_gguf)

	handle_tensors(writer, state_dict, model_arch)
	writer.write_header_to_file(path=dst_path)
	writer.write_kv_data_to_file()
	writer.write_tensors_to_file(progress=True)
	writer.close()

	fix = f"./fix_5d_tensors_{model_arch.arch}.safetensors"
	if os.path.isfile(fix):
	logging.warning(f"\n### Warning! Fix file found at '{fix}'")
	logging.warning(" you most likely need to run 'fix_5d_tensors.py' after quantization.")

	return dst_path, model_arch

	if __name__ == "__main__":
	args = parse_args()
	convert_file(args.src, args.dst)