Upload GPTNeoXForCausalLM

Browse files

Files changed (6) hide show

config.json +2 -2
generation_config.json +1 -1
pytorch_model-00001-of-00003.bin +2 -2
pytorch_model-00002-of-00003.bin +2 -2
pytorch_model-00003-of-00003.bin +2 -2
pytorch_model.bin.index.json +5 -5

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "pvduy/pythia-6B-sft-summarize-tldr",
   "architectures": [
     "GPTNeoXForCausalLM"
   ],
@@ -18,7 +18,7 @@
   "rotary_pct": 0.25,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.26.1",
   "use_cache": true,
   "use_parallel_residual": true,
   "vocab_size": 50432

 {
+  "_name_or_path": "pvduy/pythia-6B-ppo-summarize-tldr",
   "architectures": [
     "GPTNeoXForCausalLM"
   ],
   "rotary_pct": 0.25,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.28.0.dev0",
   "use_cache": true,
   "use_parallel_residual": true,
   "vocab_size": 50432

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 0,
-  "transformers_version": "4.26.1"
 }

   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 0,
+  "transformers_version": "4.28.0.dev0"
 }

pytorch_model-00001-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:425f42c2307f6b298f2e45ea806894be131863436d79a2cdfaf4f18e390cf04f
-size 9938825092

 version https://git-lfs.github.com/spec/v1
+oid sha256:eae066be6eba1565fa6c380a3f5560821b3c0387fdc1c6d6844df9ebedfc3e8d
+size 10005951042

pytorch_model-00002-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00b3dc4d87f5a894ff46d53b16993cb7dc6c2bb186d5442f2774b13a8243180a
-size 9783752917

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e5fee07c3f2dc231a2dfb794c41b6faa3f50c4ba119117c77cf8ccb1f159ceb
+size 9985128733

pytorch_model-00003-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ac5203743bc0440d75861d2da9be444b5f2804238802f1683aee64651310e52
-size 7841017136

 version https://git-lfs.github.com/spec/v1
+oid sha256:98621b9e55db137f6b11a79bf056d7ee2f0fd824e248e0f428bf354475dc391a
+size 7572515298

pytorch_model.bin.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 27563427968
   },
   "weight_map": {
     "embed_out.weight": "pytorch_model-00003-of-00003.bin",
@@ -53,8 +53,8 @@
     "gpt_neox.layers.10.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.bias": "pytorch_model-00001-of-00003.bin",
-    "gpt_neox.layers.11.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
-    "gpt_neox.layers.11.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.11.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
@@ -258,8 +258,8 @@
     "gpt_neox.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.23.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
     "gpt_neox.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
-    "gpt_neox.layers.23.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
-    "gpt_neox.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
     "gpt_neox.layers.23.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.24.attention.bias": "pytorch_model-00003-of-00003.bin",

 {
   "metadata": {
+    "total_size": 27445987456.0
   },
   "weight_map": {
     "embed_out.weight": "pytorch_model-00003-of-00003.bin",
     "gpt_neox.layers.10.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.bias": "pytorch_model-00001-of-00003.bin",
+    "gpt_neox.layers.11.attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "gpt_neox.layers.11.attention.dense.weight": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.11.attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
     "gpt_neox.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.23.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
     "gpt_neox.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "gpt_neox.layers.23.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "gpt_neox.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.23.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
     "gpt_neox.layers.24.attention.bias": "pytorch_model-00003-of-00003.bin",