hails commited on
Commit
66daa77
1 Parent(s): bce962c

Upload pytorch_model.bin.index.json

Browse files
Files changed (1) hide show
  1. pytorch_model.bin.index.json +12 -12
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 23843140808
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "pytorch_model-00003-of-00003.bin",
@@ -106,27 +106,27 @@
106
  "gpt_neox.layers.14.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
107
  "gpt_neox.layers.14.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
108
  "gpt_neox.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
109
- "gpt_neox.layers.14.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
110
- "gpt_neox.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
111
  "gpt_neox.layers.14.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
112
  "gpt_neox.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
113
  "gpt_neox.layers.14.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
114
  "gpt_neox.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
115
- "gpt_neox.layers.15.attention.bias": "pytorch_model-00002-of-00003.bin",
116
  "gpt_neox.layers.15.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
117
  "gpt_neox.layers.15.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
118
- "gpt_neox.layers.15.attention.masked_bias": "pytorch_model-00002-of-00003.bin",
119
  "gpt_neox.layers.15.attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
120
  "gpt_neox.layers.15.attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
121
- "gpt_neox.layers.15.attention.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
122
- "gpt_neox.layers.15.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
123
- "gpt_neox.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
124
  "gpt_neox.layers.15.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
125
  "gpt_neox.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
126
  "gpt_neox.layers.15.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
127
  "gpt_neox.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
128
- "gpt_neox.layers.15.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
129
- "gpt_neox.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
130
  "gpt_neox.layers.16.attention.bias": "pytorch_model-00002-of-00003.bin",
131
  "gpt_neox.layers.16.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
132
  "gpt_neox.layers.16.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
@@ -378,8 +378,8 @@
378
  "gpt_neox.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
379
  "gpt_neox.layers.30.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
380
  "gpt_neox.layers.30.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
381
- "gpt_neox.layers.30.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
382
- "gpt_neox.layers.30.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
383
  "gpt_neox.layers.30.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
384
  "gpt_neox.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
385
  "gpt_neox.layers.31.attention.bias": "pytorch_model-00003-of-00003.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 23711020232.0
4
  },
5
  "weight_map": {
6
  "embed_out.weight": "pytorch_model-00003-of-00003.bin",
 
106
  "gpt_neox.layers.14.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
107
  "gpt_neox.layers.14.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
108
  "gpt_neox.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
109
+ "gpt_neox.layers.14.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
110
+ "gpt_neox.layers.14.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
111
  "gpt_neox.layers.14.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
112
  "gpt_neox.layers.14.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
113
  "gpt_neox.layers.14.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
114
  "gpt_neox.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
115
+ "gpt_neox.layers.15.attention.bias": "pytorch_model-00001-of-00003.bin",
116
  "gpt_neox.layers.15.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
117
  "gpt_neox.layers.15.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
118
+ "gpt_neox.layers.15.attention.masked_bias": "pytorch_model-00001-of-00003.bin",
119
  "gpt_neox.layers.15.attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
120
  "gpt_neox.layers.15.attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
121
+ "gpt_neox.layers.15.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
122
+ "gpt_neox.layers.15.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
123
+ "gpt_neox.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
124
  "gpt_neox.layers.15.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
125
  "gpt_neox.layers.15.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
126
  "gpt_neox.layers.15.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
127
  "gpt_neox.layers.15.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
128
+ "gpt_neox.layers.15.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
129
+ "gpt_neox.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
130
  "gpt_neox.layers.16.attention.bias": "pytorch_model-00002-of-00003.bin",
131
  "gpt_neox.layers.16.attention.dense.bias": "pytorch_model-00002-of-00003.bin",
132
  "gpt_neox.layers.16.attention.dense.weight": "pytorch_model-00002-of-00003.bin",
 
378
  "gpt_neox.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
379
  "gpt_neox.layers.30.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
380
  "gpt_neox.layers.30.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
381
+ "gpt_neox.layers.30.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
382
+ "gpt_neox.layers.30.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
383
  "gpt_neox.layers.30.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
384
  "gpt_neox.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
385
  "gpt_neox.layers.31.attention.bias": "pytorch_model-00003-of-00003.bin",