awni00
/

DAT-sa8-ra8-ns1024-sh8-nkvh4-343M

Text Generation

model_hub_mixin

pytorch_model_hub_mixin

Inference Endpoints

Model card Files Files and versions Community

awni00 commited on Jul 30, 2024

Commit

0263d1a

·

verified ·

1 Parent(s): 8f4d880

Push model using huggingface_hub.

Files changed (2) hide show

config.json +1 -8
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,8 +1,5 @@
 {
   "activation": "gelu",
-  "architectures": [
-    "PretrainedDualAttnTransformerLM"
-  ],
   "bias": false,
   "d_model": 1024,
   "dff": null,
@@ -12,7 +9,6 @@
   "n_heads_sa": 8,
   "n_layers": 24,
   "norm_first": true,
-  "norm_type": "layernorm",
   "pos_enc_type": "RoPE",
   "ra_kwargs": {
     "n_kv_heads": 4,
@@ -25,7 +21,6 @@
   "sa_kwargs": {
     "n_kv_heads": 4
   },
-  "share_attn_params": false,
   "symbol_retrieval": "symbolic_attention",
   "symbol_retrieval_kwargs": {
     "d_model": 1024,
@@ -37,7 +32,5 @@
     "shared_symbol_retriever": true,
     "weight_tie_symbol_library": false
   },
-  "torch_dtype": "float32",
-  "transformers_version": "4.39.3",
   "vocab_size": 50304
-}

 {
   "activation": "gelu",
   "bias": false,
   "d_model": 1024,
   "dff": null,
   "n_heads_sa": 8,
   "n_layers": 24,
   "norm_first": true,
   "pos_enc_type": "RoPE",
   "ra_kwargs": {
     "n_kv_heads": 4,
   "sa_kwargs": {
     "n_kv_heads": 4
   },
   "symbol_retrieval": "symbolic_attention",
   "symbol_retrieval_kwargs": {
     "d_model": 1024,
     "shared_symbol_retriever": true,
     "weight_tie_symbol_library": false
   },
   "vocab_size": 50304
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbf26bd2dc50af838ba713724a8d49962a1e99ba170a62c64da215aeef14b04a
-size 1377604392

 version https://git-lfs.github.com/spec/v1
+oid sha256:44e33f55dad515c97020e28a1975d29dc6e01628a3a35ca36026c4a831cfecaf
+size 1377609760