{ "attention_qkv_bias": false, "codebook_size": 1024, "dim": 1024, "dropout": 0.1, "head_dim": 64, "initializer_range": 0.02, "intermediate_size": 4096, "max_seq_len": 4096, "model_type": "dual_ar", "n_fast_layer": 4, "n_head": 16, "n_layer": 24, "n_local_heads": 2, "norm_eps": 1e-06, "num_codebooks": 4, "rope_base": 1000000.0, "tie_word_embeddings": false, "use_gradient_checkpointing": true, "vocab_size": 32000 }