internlm2_5-7b-chat-1m-llamafied-6bpw-exl2 / convert_weights_internlm.py

Upload folder using huggingface_hub

65f8153 verified 11 days ago

No virus

3.76 kB

	#!/usr/bin/env python3
	# 1/17/2024
	# Charles O. Goddard
	# https://huggingface.co/chargoddard/internlm2-7b-llama/raw/main/convert_weights.py
	"""Convert internlm2 weights to Llama format."""

	import json
	import os
	import einops
	import tqdm
	from mergekit.io import LazyTensorLoader, TensorWriter
	from mergekit.common import ModelReference
	from transformers import LlamaTokenizer

	MODEL_IN = "raw weights"
	OUT_PATH = "llamafied weights"

	model_ref = ModelReference.parse(MODEL_IN)
	cfg = model_ref.config(trust_remote_code=True)
	head_dim = cfg.hidden_size // cfg.num_attention_heads
	num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads
	loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True)
	writer = TensorWriter(OUT_PATH)

	SIMPLE_REPLACEMENTS = {
	"feed_forward.w1": "mlp.gate_proj",
	"feed_forward.w2": "mlp.down_proj",
	"feed_forward.w3": "mlp.up_proj",
	"attention.wo": "self_attn.o_proj",
	"ffn_norm": "post_attention_layernorm",
	"attention_norm": "input_layernorm",
	"tok_embeddings": "embed_tokens",
	"output.weight": "lm_head.weight",
	}

	for tensor_name in tqdm.tqdm(loader.index.tensor_paths):
	tensor = loader.get_tensor(tensor_name)
	if "attention.wqkv" in tensor_name:
	# make me think about tensor shapes will you >:(

	# ((cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim, cfg.hidden_size) x (batch_sz, sq_len, cfg.hidden_size)
	# -> (batch_sz, sq_len, (cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim)
	# qkv_states = rearrange(
	# qkv_states,
	# "b q (h gs d) -> b q h gs d",
	# gs=2 + self.num_key_value_groups,
	# d=self.head_dim,
	# )
	# ->(batch_sz, sq_len, h, 2 + self.num_key_value_groups, head_dim)
	qkv_vecs = einops.rearrange(
	tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim
	)
	q_proj = (
	qkv_vecs[:, :num_key_value_groups, ...]
	.reshape(-1, cfg.hidden_size)
	.contiguous()
	)
	k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous()
	v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous()
	assert k_proj.shape == v_proj.shape

	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.q_proj"),
	q_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.k_proj"),
	k_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.v_proj"),
	v_proj,
	clone=True,
	)
	continue

	out_name = tensor_name
	for pattern, sub in SIMPLE_REPLACEMENTS.items():
	if pattern in out_name:
	out_name = out_name.replace(pattern, sub)
	writer.save_tensor(out_name, tensor)
	writer.finalize()

	cfg_dict = json.loads(cfg.to_json_string())
	del cfg_dict["auto_map"]
	cfg_dict["architectures"] = ["LlamaForCausalLM"]
	cfg_dict["model_type"] = "llama"
	if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0:
	del cfg_dict["rope_scaling"]
	with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp:
	json.dump(cfg_dict, fp, indent=2)

	# InternLMTokenizer differences:
	# 1. clean_up_tokenization() hardcoded to always be called
	# 2. might prepend a space to some tokens that LlamaTokenizer doesn't if they're the first token
	# 1 is easy to fix, 2... is not important
	tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True)
	tok.clean_up_tokenization_spaces = True
	tok.save_pretrained(OUT_PATH)