{ "dim": 4096, "hidden_dim": 14336, "head_dim": 128, "n_layers": 32, "n_heads": 32, "n_kv_heads": 8, "vocab_size": 32000, "norm_eps": 1e-05, "rope_theta": 1000000, "max_batch_size": 32, "max_seq_len": 4096, "moe": { "num_experts_per_tok": 2, "num_experts": 8 }, "rope_scaling": null }