bert4torch_config / chatglm2-6b-32k /bert4torch_config.json
Tongjilibo's picture
修改flash_attention为_attn_implementation,增加deepseek
35d10b1
raw
history blame contribute delete
No virus
611 Bytes
{
"model": "glm2",
"hidden_act": "swiglu",
"hidden_size": 4096,
"intermediate_size": 13696,
"layer_norm_eps": 1e-05,
"max_sequence_length": 32768,
"num_attention_heads": 32,
"num_hidden_layers": 28,
"vocab_size": 65024,
"segment_vocab_size": 0,
"num_key_value_heads": 2,
"skip_init": true,
"tie_word_embeddings": false,
"eos_token_id": 2,
"pad_token_id": 2,
"rmsnorm": true,
"rope_rank": "adjacent",
"rope_scaling": {
"type": "linear",
"factor": 16
},
"position_encoding_2d": true,
"_attn_implementation": "sdpa",
"generation_config": {"max_length": 32768}
}