File size: 802 Bytes
33c1ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
{
  "activation": "gelu",
  "bias": false,
  "d_model": 2048,
  "dff": null,
  "dropout_rate": 0.0,
  "max_block_size": 1024,
  "n_heads_ra": 16,
  "n_heads_sa": 16,
  "n_layers": 24,
  "norm_first": true,
  "pos_enc_type": "RoPE",
  "ra_kwargs": {
    "n_kv_heads": 8,
    "n_relations": 128,
    "rel_activation": "identity",
    "rel_proj_dim": 8,
    "symmetric_rels": false
  },
  "ra_type": "relational_attention",
  "sa_kwargs": {
    "n_kv_heads": 8
  },
  "share_attn_params": false,
  "symbol_retrieval": "symbolic_attention",
  "symbol_retrieval_kwargs": {
    "d_model": 2048,
    "n_heads": 16,
    "n_symbols": 2048,
    "trainable_symbols": false
  },
  "symbol_retriever_config": {
    "shared_symbol_retriever": true,
    "weight_tie_symbol_library": false
  },
  "vocab_size": 50304
}