Push model using huggingface_hub.

Files changed (3) hide show

README.md ADDED Viewed

+---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Library: [More Information Needed]
+- Docs: [More Information Needed]

config.json ADDED Viewed

+{
+  "activation": "gelu",
+  "bias": false,
+  "d_model": 2048,
+  "dff": null,
+  "dropout_rate": 0.0,
+  "max_block_size": 1024,
+  "n_heads_ra": 16,
+  "n_heads_sa": 16,
+  "n_layers": 24,
+  "norm_first": true,
+  "pos_enc_type": "RoPE",
+  "ra_kwargs": {
+    "n_kv_heads": 8,
+    "n_relations": 64,
+    "rel_activation": "identity",
+    "rel_proj_dim": 16,
+    "symmetric_rels": false
+  },
+  "ra_type": "relational_attention",
+  "sa_kwargs": {
+    "n_kv_heads": 8
+  },
+  "share_attn_params": false,
+  "symbol_retrieval": "symbolic_attention",
+  "symbol_retrieval_kwargs": {
+    "d_model": 2048,
+    "n_heads": 8,
+    "n_symbols": 2048,
+    "trainable_symbols": false
+  },
+  "symbol_retriever_config": {
+    "shared_symbol_retriever": true,
+    "weight_tie_symbol_library": false
+  },
+  "vocab_size": 50304
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:df923a992ad5c7787e9c138d94e3c8761f72456ebf621deae5028551c0494083
+size 5101083552