|
{ |
|
"architectures": [ |
|
"LmdeployForCausalLM" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "configuration_lmdeploy.LmdeployConfig", |
|
"AutoModel": "modeling_lmdeploy.LmdeployForCausalLM", |
|
"AutoModelForCausalLM": "modeling_lmdeploy.LmdeployForCausalLM" |
|
}, |
|
"turbomind": { |
|
"model_name": "internlm-chat-20b", |
|
"tensor_para_size": 1, |
|
"head_num": 40, |
|
"kv_head_num": 40, |
|
"vocab_size": 103168, |
|
"num_layer": 60, |
|
"inter_size": 13824, |
|
"norm_eps": 1e-06, |
|
"attn_bias": 0, |
|
"start_id": 1, |
|
"end_id": 2, |
|
"session_len": 8200, |
|
"weight_type": "int4", |
|
"rotary_embedding": 128, |
|
"rope_theta": 10000.0, |
|
"size_per_head": 128, |
|
"group_size": 128, |
|
"max_batch_size": 32, |
|
"max_context_token_num": 4, |
|
"step_length": 1, |
|
"cache_max_entry_count": 48, |
|
"cache_chunk_size": 1, |
|
"use_context_fmha": 1, |
|
"quant_policy": 0, |
|
"max_position_embeddings": 2048, |
|
"use_dynamic_ntk": 0, |
|
"use_logn_attn": 0 |
|
} |
|
} |
|
|