{ | |
"attn_cfg": null, | |
"attn_layer_idx": null, | |
"d_inner": 512, | |
"d_model": 512, | |
"device": null, | |
"dtype": null, | |
"embed_dropout": 0.1, | |
"fused_dropout_add_ln": true, | |
"fused_mlp": false, | |
"initializer_cfg": null, | |
"layer": { | |
"_name_": "hyena", | |
"d_model": 512, | |
"emb_dim": 3, | |
"filter_order": 64, | |
"l_max": 1026, | |
"local_order": 3, | |
"lr": 0.0002, | |
"lr_pos_emb": 0.0, | |
"modulate": true, | |
"w": 10, | |
"wd": 0.0 | |
}, | |
"layer_norm_epsilon": 1e-05, | |
"max_position_embeddings": 0, | |
"n_classes": 2, | |
"n_layer": 8, | |
"pad_vocab_size_multiple": 8, | |
"resid_dropout": 0.0, | |
"residual_in_fp32": true, | |
"return_hidden_state": true, | |
"use_head": true, | |
"vocab_size": 1000 | |
} |