Upload GPTJForCausalLM

Files changed (4) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "out/crawl_kogpt_ryan16_b32_lr2e-6_fp16",
   "activation_function": "gelu_new",
   "architectures": [
     "GPTJForCausalLM"

 {
+  "_name_or_path": "out/crawl2_kogpt_ryan16_warmup_fix_continue/checkpoint-400",
   "activation_function": "gelu_new",
   "architectures": [
     "GPTJForCausalLM"

pytorch_model-00001-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cadef4f51acc41163e1b33bf4580d1196eac77d7e2c8e86b33d432a5a05117b
-size 9992262166

 version https://git-lfs.github.com/spec/v1
+oid sha256:1af0ea111c8a065ba0027f8a56acdbf5ac8f93d1dce00f973800d5dcbb220190
+size 10025818002

pytorch_model-00002-of-00002.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:170ea932a48485a7e83e76fbf3cfe821febd446818525dd1656b44761743daf2
-size 2575737991

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0012117bb5550f358d8046947c31e12a201b49508e145336f62d7c2603b5c1c
+size 2424741759

pytorch_model.bin.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 12567885880
   },
   "weight_map": {
     "lm_head.bias": "pytorch_model-00002-of-00002.bin",
@@ -198,11 +198,11 @@
     "transformer.h.22.mlp.fc_out.bias": "pytorch_model-00001-of-00002.bin",
     "transformer.h.22.mlp.fc_out.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.attn.bias": "pytorch_model-00001-of-00002.bin",
-    "transformer.h.23.attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
     "transformer.h.23.attn.masked_bias": "pytorch_model-00001-of-00002.bin",
-    "transformer.h.23.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "transformer.h.23.attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
-    "transformer.h.23.attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
     "transformer.h.23.ln_1.bias": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.ln_1.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.mlp.fc_in.bias": "pytorch_model-00002-of-00002.bin",

 {
   "metadata": {
+    "total_size": 12347684920.0
   },
   "weight_map": {
     "lm_head.bias": "pytorch_model-00002-of-00002.bin",
     "transformer.h.22.mlp.fc_out.bias": "pytorch_model-00001-of-00002.bin",
     "transformer.h.22.mlp.fc_out.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.attn.bias": "pytorch_model-00001-of-00002.bin",
+    "transformer.h.23.attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.attn.masked_bias": "pytorch_model-00001-of-00002.bin",
+    "transformer.h.23.attn.out_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "transformer.h.23.attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "transformer.h.23.attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.ln_1.bias": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.ln_1.weight": "pytorch_model-00001-of-00002.bin",
     "transformer.h.23.mlp.fc_in.bias": "pytorch_model-00002-of-00002.bin",