{ "architectures": [ "OpenLMModel" ], "model_type": "openlm", "params": null, "apply_qk_norm": true, "attn_activation": null, "attn_name": "torch_attn", "attn_seq_scalar": null, "attn_seq_scalar_alpha": null, "dim": 4096, "ffn_type": "swiglu_torch", "model": "open_lm_7b", "norm_type": "gain_only_lp_layer_norm", "moe_capacity_factor": 1.25, "moe_expert_model_parallelism": false, "moe_freq": 0, "moe_loss_weight": 0.1, "moe_num_experts": null, "moe_top_k": 2, "moe_weight_parallelism": false, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "positional_embedding_type": "rotary", "post_embed_norm": false, "qk_norm": true, "seq_len": 8192, "vocab_size": 50432, "weight_tying": false, "torch_dtype": "float32", "transformers_version": "4.38.2" }