nuxlear commited on
Commit
82c4253
1 Parent(s): 52e7242

Upload GPTJForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "out/crawl_kogpt_ryan16_b32_lr2e-6_fp16",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTJForCausalLM"
 
1
  {
2
+ "_name_or_path": "out/crawl2_kogpt_ryan16_warmup_fix_continue/checkpoint-400",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTJForCausalLM"
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cadef4f51acc41163e1b33bf4580d1196eac77d7e2c8e86b33d432a5a05117b
3
- size 9992262166
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af0ea111c8a065ba0027f8a56acdbf5ac8f93d1dce00f973800d5dcbb220190
3
+ size 10025818002
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:170ea932a48485a7e83e76fbf3cfe821febd446818525dd1656b44761743daf2
3
- size 2575737991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0012117bb5550f358d8046947c31e12a201b49508e145336f62d7c2603b5c1c
3
+ size 2424741759
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 12567885880
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "pytorch_model-00002-of-00002.bin",
@@ -198,11 +198,11 @@
198
  "transformer.h.22.mlp.fc_out.bias": "pytorch_model-00001-of-00002.bin",
199
  "transformer.h.22.mlp.fc_out.weight": "pytorch_model-00001-of-00002.bin",
200
  "transformer.h.23.attn.bias": "pytorch_model-00001-of-00002.bin",
201
- "transformer.h.23.attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
202
  "transformer.h.23.attn.masked_bias": "pytorch_model-00001-of-00002.bin",
203
- "transformer.h.23.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
204
- "transformer.h.23.attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
205
- "transformer.h.23.attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
206
  "transformer.h.23.ln_1.bias": "pytorch_model-00001-of-00002.bin",
207
  "transformer.h.23.ln_1.weight": "pytorch_model-00001-of-00002.bin",
208
  "transformer.h.23.mlp.fc_in.bias": "pytorch_model-00002-of-00002.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 12347684920.0
4
  },
5
  "weight_map": {
6
  "lm_head.bias": "pytorch_model-00002-of-00002.bin",
 
198
  "transformer.h.22.mlp.fc_out.bias": "pytorch_model-00001-of-00002.bin",
199
  "transformer.h.22.mlp.fc_out.weight": "pytorch_model-00001-of-00002.bin",
200
  "transformer.h.23.attn.bias": "pytorch_model-00001-of-00002.bin",
201
+ "transformer.h.23.attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
202
  "transformer.h.23.attn.masked_bias": "pytorch_model-00001-of-00002.bin",
203
+ "transformer.h.23.attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
204
+ "transformer.h.23.attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
205
+ "transformer.h.23.attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
206
  "transformer.h.23.ln_1.bias": "pytorch_model-00001-of-00002.bin",
207
  "transformer.h.23.ln_1.weight": "pytorch_model-00001-of-00002.bin",
208
  "transformer.h.23.mlp.fc_in.bias": "pytorch_model-00002-of-00002.bin",