{ "_commit_hash": "9ae63354fd42cc1e14334bba246276540c8b9017", "_name_or_path": "togethercomputer/StripedHyena-Nous-7B", "model_type": "stripedhyena", "architectures": [ "StripedHyenaModelForCausalLM" ], "attn_layer_idxs": [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 ], "auto_map": { "AutoConfig": "togethercomputer/StripedHyena-Nous-7B--configuration_hyena.StripedHyenaConfig", "AutoModelForCausalLM": "togethercomputer/StripedHyena-Nous-7B--modeling_hyena.StripedHyenaModelForCausalLM" }, "column_split": false, "column_split_hyena": true, "eps": 1e-05, "final_norm": true, "hidden_size": 4096, "hyena_filter_groups": 1, "hyena_layer_idxs": [ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32 ], "inference_mode": false, "inner_mlp_size": 14336, "log_intermediate_values": false, "make_vocab_size_divisible_by": 8, "max_seqlen": 32768, "mha_out_proj_bias": false, "model_parallel_size": 1, "num_attention_heads": 32, "num_filters": 4096, "num_layers": 32, "pipe_parallel_size": 1, "prefill_style": "fft", "proj_groups": 4, "qkv_proj_bias": false, "rotary_emb_base": 500000, "short_filter_bias": true, "short_filter_length": 3, "smeared_gqa": false, "split_k0": true, "state_size": 2, "tie_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": null, "use_cache": true, "use_flash_attn": true, "use_flash_depthwise": false, "use_flash_rmsnorm": true, "use_flashfft": false, "vocab_size": 32000 }