pvduy commited on
Commit
4b6113a
1 Parent(s): 8149230

Upload GPTNeoXForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "pvduy/pythia-6B-sft-summarize-tldr",
3
  "architectures": [
4
  "GPTNeoXForCausalLM"
5
  ],
@@ -18,7 +18,7 @@
18
  "rotary_pct": 0.25,
19
  "tie_word_embeddings": false,
20
  "torch_dtype": "float32",
21
- "transformers_version": "4.26.1",
22
  "use_cache": true,
23
  "use_parallel_residual": true,
24
  "vocab_size": 50432
 
1
  {
2
+ "_name_or_path": "pvduy/pythia-6B-ppo-summarize-tldr",
3
  "architectures": [
4
  "GPTNeoXForCausalLM"
5
  ],
 
18
  "rotary_pct": 0.25,
19
  "tie_word_embeddings": false,
20
  "torch_dtype": "float32",
21
+ "transformers_version": "4.28.0.dev0",
22
  "use_cache": true,
23
  "use_parallel_residual": true,
24
  "vocab_size": 50432
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
- "transformers_version": "4.26.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
+ "transformers_version": "4.28.0.dev0"
6
  }
pytorch_model-00001-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:425f42c2307f6b298f2e45ea806894be131863436d79a2cdfaf4f18e390cf04f
3
- size 9938825092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae066be6eba1565fa6c380a3f5560821b3c0387fdc1c6d6844df9ebedfc3e8d
3
+ size 10005951042
pytorch_model-00002-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b3dc4d87f5a894ff46d53b16993cb7dc6c2bb186d5442f2774b13a8243180a
3
- size 9783752917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e5fee07c3f2dc231a2dfb794c41b6faa3f50c4ba119117c77cf8ccb1f159ceb
3
+ size 9985128733
pytorch_model-00003-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac5203743bc0440d75861d2da9be444b5f2804238802f1683aee64651310e52
3
- size 7841017136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98621b9e55db137f6b11a79bf056d7ee2f0fd824e248e0f428bf354475dc391a
3
+ size 7572515298
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 27563427968
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "pytorch_model-00003-of-00003.bin",
@@ -53,8 +53,8 @@
53
  "gpt_neox.layers.10.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
54
  "gpt_neox.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
55
  "gpt_neox.layers.11.attention.bias": "pytorch_model-00001-of-00003.bin",
56
- "gpt_neox.layers.11.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
57
- "gpt_neox.layers.11.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
58
  "gpt_neox.layers.11.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
59
  "gpt_neox.layers.11.attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
60
  "gpt_neox.layers.11.attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
@@ -258,8 +258,8 @@
258
  "gpt_neox.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
259
  "gpt_neox.layers.23.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
260
  "gpt_neox.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
261
- "gpt_neox.layers.23.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
262
- "gpt_neox.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
263
  "gpt_neox.layers.23.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
264
  "gpt_neox.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
265
  "gpt_neox.layers.24.attention.bias": "pytorch_model-00003-of-00003.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 27445987456.0
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "pytorch_model-00003-of-00003.bin",
 
53
  "gpt_neox.layers.10.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
54
  "gpt_neox.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
55
  "gpt_neox.layers.11.attention.bias": "pytorch_model-00001-of-00003.bin",
56
+ "gpt_neox.layers.11.attention.dense.bias": "pytorch_model-00001-of-00003.bin",
57
+ "gpt_neox.layers.11.attention.dense.weight": "pytorch_model-00001-of-00003.bin",
58
  "gpt_neox.layers.11.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
59
  "gpt_neox.layers.11.attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
60
  "gpt_neox.layers.11.attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
 
258
  "gpt_neox.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
259
  "gpt_neox.layers.23.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
260
  "gpt_neox.layers.23.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
261
+ "gpt_neox.layers.23.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
262
+ "gpt_neox.layers.23.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
263
  "gpt_neox.layers.23.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
264
  "gpt_neox.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
265
  "gpt_neox.layers.24.attention.bias": "pytorch_model-00003-of-00003.bin",