|
|
|
|
|
|
|
"""Convert internlm2 weights to Llama format.""" |
|
|
|
import json |
|
import os |
|
import einops |
|
import tqdm |
|
from mergekit.io import LazyTensorLoader, TensorWriter |
|
from mergekit.common import ModelReference |
|
from transformers import LlamaTokenizer |
|
|
|
MODEL_IN = "internlm/internlm2-20b" |
|
OUT_PATH = "./internlm2-20b-llama" |
|
|
|
model_ref = ModelReference.parse(MODEL_IN) |
|
cfg = model_ref.config(trust_remote_code=True) |
|
head_dim = cfg.hidden_size // cfg.num_attention_heads |
|
num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads |
|
loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True) |
|
writer = TensorWriter(OUT_PATH) |
|
|
|
SIMPLE_REPLACEMENTS = { |
|
"feed_forward.w1": "mlp.gate_proj", |
|
"feed_forward.w2": "mlp.down_proj", |
|
"feed_forward.w3": "mlp.up_proj", |
|
"attention.wo": "self_attn.o_proj", |
|
"ffn_norm": "post_attention_layernorm", |
|
"attention_norm": "input_layernorm", |
|
"tok_embeddings": "embed_tokens", |
|
"output.weight": "lm_head.weight", |
|
} |
|
|
|
for tensor_name in tqdm.tqdm(loader.index.tensor_paths): |
|
tensor = loader.get_tensor(tensor_name) |
|
if "attention.wqkv" in tensor_name: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
qkv_vecs = einops.rearrange( |
|
tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim |
|
) |
|
q_proj = ( |
|
qkv_vecs[:, :num_key_value_groups, ...] |
|
.reshape(-1, cfg.hidden_size) |
|
.contiguous() |
|
) |
|
k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous() |
|
v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous() |
|
assert k_proj.shape == v_proj.shape |
|
|
|
writer.save_tensor( |
|
tensor_name.replace("attention.wqkv", "self_attn.q_proj"), |
|
q_proj, |
|
clone=True, |
|
) |
|
writer.save_tensor( |
|
tensor_name.replace("attention.wqkv", "self_attn.k_proj"), |
|
k_proj, |
|
clone=True, |
|
) |
|
writer.save_tensor( |
|
tensor_name.replace("attention.wqkv", "self_attn.v_proj"), |
|
v_proj, |
|
clone=True, |
|
) |
|
continue |
|
|
|
out_name = tensor_name |
|
for pattern, sub in SIMPLE_REPLACEMENTS.items(): |
|
if pattern in out_name: |
|
out_name = out_name.replace(pattern, sub) |
|
writer.save_tensor(out_name, tensor) |
|
writer.finalize() |
|
|
|
cfg_dict = json.loads(cfg.to_json_string()) |
|
del cfg_dict["auto_map"] |
|
cfg_dict["architectures"] = ["LlamaForCausalLM"] |
|
cfg_dict["model_type"] = "llama" |
|
if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0: |
|
del cfg_dict["rope_scaling"] |
|
with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp: |
|
json.dump(cfg_dict, fp, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True) |
|
tok.clean_up_tokenization_spaces = True |
|
tok.save_pretrained(OUT_PATH) |
|
|