flyingfishinwater/chinese-baby-llama2 · How to convert the llama2.c model into huggingface model format?

win10

Oct 7, 2023

How to convert the llama2.c model into huggingface model format?
Can you provide a way?

flyingfishinwater

Owner Oct 9, 2023

•

edited Oct 9, 2023


def save_hf_model(model: Transformer, model_args: ModelArguments, tokenizer: Tokenizer,
                  model_path: str, dtype=torch.float16):
    logger.info("convert the model to huggingface format")
    config = LlamaConfig(
        vocab_size=model_args.vocab_size,
        hidden_size=model_args.dim,
        bos_token_id=tokenizer.bos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        max_position_embeddings=model_args.max_seq_len,
        num_attention_heads=model_args.n_heads,
        num_hidden_layers=model_args.n_layers,
        model_type="llama",
        torch_dtype=dtype,
        intermediate_size=model_args.hidden_dim,
        hidden_act="silu"
    )
    hf_state_dict = {}
    hf_state_dict["model.embed_tokens.weight"] = model.tok_embeddings.weight.clone()
    hf_state_dict["model.norm.weight"] = model.norm.weight.clone()

    def permute_forward(w, n_heads=model_args.n_heads, dim1=model_args.dim, dim2=model_args.dim):
        return w.view(n_heads, dim1 // n_heads // 2, 2, dim2).transpose(1, 2).reshape(dim1, dim2)

    for layer in model.layers:
        i = layer.layer_id
        hf_state_dict[f'model.layers.{i}.input_layernorm.weight'] = layer.attention_norm.weight.clone()
        hf_state_dict[f'model.layers.{i}.self_attn.q_proj.weight'] = permute_forward(layer.attention.wq.weight).clone()
        hf_state_dict[f'model.layers.{i}.self_attn.k_proj.weight'] = permute_forward(layer.attention.wk.weight).clone()
        hf_state_dict[f'model.layers.{i}.self_attn.v_proj.weight'] = layer.attention.wv.weight.clone()
        hf_state_dict[f'model.layers.{i}.self_attn.o_proj.weight'] = layer.attention.wo.weight.clone()
        hf_state_dict[f'model.layers.{i}.post_attention_layernorm.weight'] = layer.ffn_norm.weight.clone()
        hf_state_dict[f'model.layers.{i}.mlp.gate_proj.weight'] = layer.feed_forward.w1.weight.clone()
        hf_state_dict[f'model.layers.{i}.mlp.down_proj.weight'] = layer.feed_forward.w2.weight.clone()
        hf_state_dict[f'model.layers.{i}.mlp.up_proj.weight'] = layer.feed_forward.w3.weight.clone()
    hf_state_dict["lm_head.weight"] = model.output.weight.clone()
    hf_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=None, config=config,
                                                    state_dict=hf_state_dict, torch_dtype=dtype)
    hf_model.save_pretrained(model_path)

win10

Oct 15, 2023

Can you provide the full script?
I don't really understand.