The files in the zip compression package seem to be different from the files named -hf, I would like to ask what is the difference between the two?

#5
by wkh666 - opened

作者您好,我是刚接触项目的小白,想问一下zip压缩包里的文件似乎和 以-hf命名的文件是不一样的,这两者的区别是什么呢?谢谢

Knowledge Engineering Group (KEG) & Data Mining at Tsinghua University org

这个zip中的权重是用sat训练的权重,和这里的模型实现是配对的。之后为了方便大家使用,把模型实现和权重都转化成和huggingface兼容的格式了,就是你看到的-hf的仓库。

明白,能否提供一下转化的代码呢?非常感谢

Knowledge Engineering Group (KEG) & Data Mining at Tsinghua University org
edited Dec 15, 2023

就是把state_dict内weights的名字替换下

有点丑陋的代码
def vlm(
        hf_dir: str,
        sat_dir: str,
):
    import os
    import json
    import torch
    from pathlib import Path
    Path(hf_dir).mkdir(exist_ok=True)

    # state dict
    state_dict = torch.load(os.path.expanduser(os.path.join(sat_dir, '1', 'mp_rank_00_model_states.pt')), map_location='cpu')
    state_dict = state_dict['module']
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('mixins.eva.vit_model.mixins.patch_embedding'):
            new_state_dict[k.replace('mixins.eva.vit_model.mixins.', '', 1)] = v
        elif k.startswith('mixins.eva.vit_model.transformer.position_embeddings'):
            new_state_dict[k.replace('mixins.eva.vit_model.transformer.position_embeddings', 'patch_embedding.position_embedding', 1)] = v
        elif k.startswith('mixins.eva.vit_model.transformer.layers'):
            k = k.replace('mlp.dense_4h_to_h', 'mlp.fc2').replace('mlp.dense_h_to_4h', 'mlp.fc1')
            new_state_dict[k.replace('mixins.eva.vit_model.transformer.layers', 'transformer.layers', 1)] = v
        elif k.startswith('mixins.eva.linear_proj'):
            new_state_dict[k.replace('mixins.eva.linear_proj', 'linear_proj', 1)] = v
        elif k in ['mixins.eva.vit_model.transformer.word_embeddings.weight']:
            new_state_dict['patch_embedding.cls_embedding'] = v
        elif k in ['mixins.eva.boi', 'mixins.eva.eoi']:
            new_state_dict[k.replace('mixins.eva.', '', 1)] = v
        else:
            assert not str(k).startswith('mixins.eva'), f"{k}"

    vision_state_dict = {f"model.vision.{k}": v for k, v in new_state_dict.items()}
    new_state_dict = {}
    for k, v in state_dict.items():
        if k == 'mixins.lm.lm_head.weight':
            new_state_dict['lm_head.weight'] = v
        elif k.startswith("mixins.eva"):
            continue
        # mlp
        elif k.startswith('mixins.mlp.vision_dense_h_to_4h_list.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.mlp.vision_dense_h_to_4h_list.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.vision_mlp.up_proj.weight"] = v
        elif k.startswith('mixins.mlp.vision_dense_4h_to_h_list.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.mlp.vision_dense_4h_to_h_list.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.vision_mlp.down_proj.weight"] = v
        elif k.startswith('mixins.mlp.vision_gate_proj.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.mlp.vision_gate_proj.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.vision_mlp.gate_proj.weight"] = v

        elif k.startswith('mixins.mlp.gate_proj.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.mlp.gate_proj.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.language_mlp.gate_proj.weight"] = v
        elif k.startswith('transformer.layers.') and str(k).endswith('.mlp.dense_h_to_4h.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.mlp.dense_h_to_4h.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.language_mlp.up_proj.weight"] = v
        elif k.startswith('transformer.layers.') and str(k).endswith('.mlp.dense_4h_to_h.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.mlp.dense_4h_to_h.weight', '')
            new_state_dict[f"model.layers.{idx}.mlp.language_mlp.down_proj.weight"] = v
        # attn
        elif k.startswith('transformer.layers.') and str(k).endswith('.attention.query_key_value.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.attention.query_key_value.weight', '')
            new_state_dict[f"model.layers.{idx}.self_attn.language_expert_query_key_value.weight"] = v
        elif k.startswith('transformer.layers.') and str(k).endswith('.attention.dense.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.attention.dense.weight', '')
            new_state_dict[f"model.layers.{idx}.self_attn.language_expert_dense.weight"] = v

        elif k.startswith('mixins.rotary.vision_query_key_value_list.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.rotary.vision_query_key_value_list.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.self_attn.vision_expert_query_key_value.weight"] = v
        elif k.startswith('mixins.rotary.vision_dense_list.') and str(k).endswith('.weight'):
            idx = str(k).replace('mixins.rotary.vision_dense_list.', '').replace('.weight', '')
            new_state_dict[f"model.layers.{idx}.self_attn.vision_expert_dense.weight"] = v
        # norm
        elif k.startswith('transformer.layers.') and str(k).endswith('.input_layernorm.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.input_layernorm.weight', '')
            new_state_dict[f"model.layers.{idx}.input_layernorm.weight"] = v
        elif k.startswith('transformer.layers.') and str(k).endswith('.post_attention_layernorm.weight'):
            idx = str(k).replace('transformer.layers.', '').replace('.post_attention_layernorm.weight', '')
            new_state_dict[f"model.layers.{idx}.post_attention_layernorm.weight"] = v
        #
        elif k == 'transformer.word_embeddings.weight':
            new_state_dict[f"model.embed_tokens.weight"] = v
        elif k == 'transformer.final_layernorm.weight':
            new_state_dict[f"model.norm.weight"] = v
        elif k == 'mixins.rotary.rotary_emb.inv_freq':
            for idx in range(32):
                new_state_dict[f"model.layers.{idx}.self_attn.rotary_emb.inv_freq"] = v.clone()
        else:
            assert False, f"{k}"
    new_state_dict.update(vision_state_dict)
    torch.save(new_state_dict, os.path.join(hf_dir, 'pytorch_model.bin'))

    # configs
    config = json.load(open(os.path.expanduser(os.path.join(sat_dir, 'model_config.json'))))
    vision_config = {
        'dropout_prob': 0.0,
        'hidden_act': 'gelu',
        'in_channels': config['eva_args']['in_channels'],
        'num_hidden_layers': config['eva_args']['num_layers'],
        'hidden_size': config['eva_args']['hidden_size'],
        'patch_size': config['eva_args']['patch_size'],
        'num_heads': config['eva_args']['num_attention_heads'],
        'intermediate_size': config['eva_args']['inner_hidden_size'],
        'layer_norm_eps': config['eva_args']['layernorm_epsilon'],
        'num_positions': int(1 + (config['eva_args']['image_size'][0] / config['eva_args']['patch_size']) * (
                    config['eva_args']['image_size'][0] / config['eva_args']['patch_size'])),
        #
        'image_size': config['eva_args']['image_size'][0],
    }

    final_config = {
        'vision_config': vision_config,
        'hidden_size': config['hidden_size'],
        #
        'intermediate_size': config['inner_hidden_size'],
        'num_attention_heads': config['num_attention_heads'],
        'max_position_embeddings': 2048,
        'rms_norm_eps': 1e-5,
        'template_version': 'chat' if 'chat' in sat_dir else 'base',
        'initializer_range': 0.02,
        'pad_token_id': 0,
        "bos_token_id": 1,
        "eos_token_id": 2,
        #
        'vocab_size': config['vocab_size'],
        'num_hidden_layers': config['num_layers'],
        'hidden_act': 'silu',
        'use_cache': True,
    }
    with open(os.path.join(hf_dir, 'config.json'), 'w') as f:

Thank you very much!

chenkq changed discussion status to closed

Sign up or log in to comment