|
{ |
|
"transformer_architecture": { |
|
"vocab_size": 128000, |
|
"vocab_file": "vocab.json", |
|
"hidden_size": 4608, |
|
"num_layers": 27, |
|
"num_attention_heads": 36, |
|
"num_local_attention_heads": 0, |
|
"local_attention_window_size": null, |
|
"rotary_embedding_base": 1000000, |
|
"rotary_percentage": 1.0, |
|
"sequence_length": 8192, |
|
"norm_type": "layernorm", |
|
"relative_position_embedding_type": "rotary_complex", |
|
"mlp_type": "default", |
|
"mlp_factor": 4.0, |
|
"attention_bias": true, |
|
"attention_qkv_in_one": false, |
|
"attention_num_kv_heads": 4, |
|
"attention_use_matmul": false, |
|
"mlp_bias": true, |
|
"key_query_norm": false, |
|
"weight_tying": false, |
|
"masked_softmax": { |
|
"kernel": "torch", |
|
"softmax_in_fp32": true, |
|
"scale": 1.0, |
|
"deterministic_flash_attn_bwd": false |
|
}, |
|
"layernorm": { |
|
"optimization_type": "torch", |
|
"layernorm_epsilon": 1e-05 |
|
}, |
|
"precision": "bfloat16", |
|
"dropout_embedding": 0.0, |
|
"dropout_attention_probs": 0.0, |
|
"dropout_after_attention": 0.0, |
|
"dropout_after_mlp": 0.0, |
|
"finetunable_token_ids": [], |
|
"image_encoder": false, |
|
"dropout_image_encoder": 0.0, |
|
"lora_config": null |
|
} |
|
} |
|
|