{ "d_model": 768, "debug": true, "layer_norm_eps": 1e-05, "d_vocab": 50257, "init_range": 0.02, "n_ctx": 128, "d_head": 64, "dt_head": 64, "d_mlp": 2048, "causal_attn": true, "attn_type": "trittention", "n_heads": 12, "nt_heads": 2, "n_layers": 1, "dropout": 0.1, "mlp_type": "all", "with_ln": true, "is_gated": false, "has_mlp": true, "order_attn": true, "attn_eq": false, "window_size": 16, "look_backward": 1, "pad_value": 0, "autopad": true, "freqs_cis": null }