Lyaaaaaaaaaaaaaaa commited on May 6, 2023

Commit

048fffb

•

1 Parent(s): 4a1bc07

Upload 42 files

Browse files

Files changed (42) hide show

config.json +82 -0
generation_config.json +7 -0
merges.txt +0 -0
pytorch_model-00001-of-00034.bin +3 -0
pytorch_model-00002-of-00034.bin +3 -0
pytorch_model-00003-of-00034.bin +3 -0
pytorch_model-00004-of-00034.bin +3 -0
pytorch_model-00005-of-00034.bin +3 -0
pytorch_model-00006-of-00034.bin +3 -0
pytorch_model-00007-of-00034.bin +3 -0
pytorch_model-00008-of-00034.bin +3 -0
pytorch_model-00009-of-00034.bin +3 -0
pytorch_model-00010-of-00034.bin +3 -0
pytorch_model-00011-of-00034.bin +3 -0
pytorch_model-00012-of-00034.bin +3 -0
pytorch_model-00013-of-00034.bin +3 -0
pytorch_model-00014-of-00034.bin +3 -0
pytorch_model-00015-of-00034.bin +3 -0
pytorch_model-00016-of-00034.bin +3 -0
pytorch_model-00017-of-00034.bin +3 -0
pytorch_model-00018-of-00034.bin +3 -0
pytorch_model-00019-of-00034.bin +3 -0
pytorch_model-00020-of-00034.bin +3 -0
pytorch_model-00021-of-00034.bin +3 -0
pytorch_model-00022-of-00034.bin +3 -0
pytorch_model-00023-of-00034.bin +3 -0
pytorch_model-00024-of-00034.bin +3 -0
pytorch_model-00025-of-00034.bin +3 -0
pytorch_model-00026-of-00034.bin +3 -0
pytorch_model-00027-of-00034.bin +3 -0
pytorch_model-00028-of-00034.bin +3 -0
pytorch_model-00029-of-00034.bin +3 -0
pytorch_model-00030-of-00034.bin +3 -0
pytorch_model-00031-of-00034.bin +3 -0
pytorch_model-00032-of-00034.bin +3 -0
pytorch_model-00033-of-00034.bin +3 -0
pytorch_model-00034-of-00034.bin +3 -0
pytorch_model.bin.index.json +491 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +33 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_name_or_path": "KoboldAI/GPT-Neo-2.7B-Horni",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPTNeoForCausalLM"
+  ],
+  "attention_dropout": 0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      16
+    ]
+  ],
+  "bos_token_id": 50256,
+  "embed_dropout": 0,
+  "eos_token_id": 50256,
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "gpt_neo",
+  "num_heads": 20,
+  "num_layers": 32,
+  "rep_pen": 2.0,
+  "resid_dropout": 0,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 0.9
+    }
+  },
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float16",
+  "transformers_version": "4.27.4",
+  "use_cache": false,
+  "vocab_size": 50257,
+  "window_size": 256
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.27.4",
+  "use_cache": false
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model-00001-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f713a462ff20e675c62e450a60f51aed6f65430a165f82b4f709b539f4342361
+size 537

pytorch_model-00002-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff2c4811d3084e3f8899490d040f6390de3b6b300f537807026fccb961c94b55
+size 257316778

pytorch_model-00003-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77ad16bda77e30987425ea6bbc565859580c0074df1d435f335bec81c1d610e3
+size 202443995

pytorch_model-00004-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7693eeacab18f7fddc9790523040d019ad51ca3e78aa1ed2592afebb601e903a
+size 187768439

pytorch_model-00005-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2a78bbdfa93cfe8f32c9d9b60e2d87f099ee4b14ff9c00b25b4e6023afa5e7
+size 161537181

pytorch_model-00006-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea3fe91bd58a7857889eed63d19ae4c1c6469d52c0f2a0836865f3f094e5f78f
+size 161537181

pytorch_model-00007-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4cdf137cf12258d8914694f42a220fdae0744d6e7e33a71e2934dcd9642cb9d
+size 161537181

pytorch_model-00008-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4c24616eca27229c5ff6f571248bba279db99fc5f130c7d1f3b9b1929aeeb1
+size 161537181

pytorch_model-00009-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a6c8b03e80f7b904ec35c39092f3775d753750de3fd6144e586dae1869c2dc8
+size 161537181

pytorch_model-00010-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58e8795ab24bbe49c64c196a9b44714fd734d925fc35f45c70a44caaf00e40f4
+size 161537181

pytorch_model-00011-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd3911936872ab0c4fa5832ac3f4f1229d8ac5aacf960521357deccb2855c8bd
+size 161537181

pytorch_model-00012-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:079901b8bf4738cc049abc9abb64f1617386ebd4968584b2daf53af8885b6883
+size 161537181

pytorch_model-00013-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9765e40c4365d88b0c2b9a3ce8dd01ebe6bc6fd414623cde1175660035004883
+size 161537181

pytorch_model-00014-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba1235ab0e45428d8da691a4e206c35940bc3ec16d2e09b6b77bf109d6336cd6
+size 161537181

pytorch_model-00015-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55cb363102c04a5db91508782798cab1f0504bbd006293e466d60fd2b887adcc
+size 161537181

pytorch_model-00016-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b46beae3fede30053bec5405c5b7581aebeb256b321739bb6d61a760eecbf7ea
+size 161537181

pytorch_model-00017-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1b2d513be157415e05b9211c00348d491215ac009bfdb5df072d5c17a37dd86
+size 161537181

pytorch_model-00018-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a0cc4e2c67b69cf0248c5840328dcf0da728df3299c48fe4774fce89d7d8402
+size 161537181

pytorch_model-00019-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06afc4d91903fc611c468461f9c35af2da7815e967313e8e70590970ae4f204c
+size 161537181

pytorch_model-00020-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6890fdabe9ea3d62963bcf77a50cfe75faadfb05be25098bd4ac500f9c60c87
+size 161537181

pytorch_model-00021-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e15418ddcff312163cbf0fc02644ca716a55de24a9161f124f05215de7ee446
+size 161537181

pytorch_model-00022-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d959119c3b14b8488c87ca7a1e0611b4ea8f9a56ba0308985582652440863ccd
+size 161537181

pytorch_model-00023-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6f04996567765611dd844fc0e82f07b2d06cca6f566531a006326dbd255389d
+size 161537181

pytorch_model-00024-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4ec3f4b5418876ad642f9e8f67b50256886b77a62b1a9360d2d22f45a7c6983
+size 161537181

pytorch_model-00025-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97470d9dc4c7ef6c63b80e1a57f96fb783a6f6e9d6be26f340269ed572166efb
+size 161537181

pytorch_model-00026-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac2c53428c5bd26af9f9dc333e2ac9c53d114c1349d6cfa15525698b9c21672
+size 161537181

pytorch_model-00027-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67785749f1dda5c213de5a78ccd6e8e6cc5cffa28c3e64a1f28baba6f866bc3a
+size 161537181

pytorch_model-00028-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8967a6cc210a9fcac268021996454a53c09de4b8186db85089ca312740d6038b
+size 161537181

pytorch_model-00029-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:903907068603b879c1a13be04b29be4285bafd3f5e749a112689cf5d2cf7a5c6
+size 161537181

pytorch_model-00030-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01852da94533457319203c205a75e4835f1cdb9627bd31175f3497e19bba5711
+size 161537181

pytorch_model-00031-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f0166ff19d9ae7e469021d1aa5b9d4cdd1ba4266c2f18cf2f90b6ba8bcba0d1
+size 161537181

pytorch_model-00032-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b78f6fa5b004abbd90eae1888baed8042ff4212c5ba57f1ba1c72f61620b81d
+size 161537181

pytorch_model-00033-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:662818f32484cd5b7330330b301af7891f811d46a4a7df893a5084b36be83b0c
+size 161537181

pytorch_model-00034-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e52d5ec04e9d8d07185a75409cf8d909ff20df1099ba28f5223cfb035532f40e
+size 104895871

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,491 @@

+{
+  "metadata": {
+    "total_size": 5319392320.0
+  },
+  "weight_map": {
+    "transformer.h.0.attn.attention.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.k_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.masked_bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.out_proj.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.out_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.q_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.v_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_1.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_1.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_2.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_2.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_fc.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_fc.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_proj.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.k_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.masked_bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.out_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.out_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.q_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.v_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_1.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_1.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_2.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.ln_2.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_fc.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_fc.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.10.attn.attention.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.k_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.masked_bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.out_proj.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.out_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.q_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.v_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_1.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_1.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_2.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_2.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.mlp.c_fc.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_fc.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_proj.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.k_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.masked_bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.out_proj.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.out_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.q_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.v_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_1.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_1.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_2.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_2.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.mlp.c_fc.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_fc.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_proj.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.k_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.masked_bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.out_proj.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.out_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.q_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.v_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_1.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_1.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_2.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_2.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.mlp.c_fc.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_fc.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_proj.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.k_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.masked_bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.out_proj.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.out_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.q_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.v_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_1.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_1.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_2.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_2.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.mlp.c_fc.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_fc.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_proj.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.k_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.masked_bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.out_proj.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.out_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.q_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.v_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_1.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_1.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_2.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_2.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.mlp.c_fc.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_fc.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_proj.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.k_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.masked_bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.out_proj.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.out_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.q_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.v_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_1.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_1.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_2.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_2.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.mlp.c_fc.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_fc.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_proj.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.k_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.masked_bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.out_proj.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.out_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.q_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.v_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_1.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_1.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_2.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_2.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.mlp.c_fc.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_fc.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_proj.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.k_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.masked_bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.out_proj.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.out_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.q_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.v_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_1.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_1.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_2.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_2.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.mlp.c_fc.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_fc.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_proj.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.k_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.masked_bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.out_proj.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.out_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.q_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.v_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_1.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_1.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_2.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_2.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.mlp.c_fc.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_fc.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_proj.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.k_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.masked_bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.out_proj.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.out_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.q_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.v_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_1.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_1.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_2.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_2.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.mlp.c_fc.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_fc.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_proj.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.2.attn.attention.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.k_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.masked_bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.out_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.out_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.q_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.v_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_1.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_1.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_2.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_2.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.mlp.c_fc.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_fc.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_proj.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.20.attn.attention.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.k_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.masked_bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.out_proj.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.out_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.q_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.v_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_1.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_1.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_2.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_2.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.mlp.c_fc.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_fc.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_proj.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.k_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.masked_bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.out_proj.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.out_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.q_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.v_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_1.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_1.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_2.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_2.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.mlp.c_fc.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_fc.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_proj.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.k_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.masked_bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.out_proj.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.out_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.q_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.v_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_1.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_1.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_2.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_2.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.mlp.c_fc.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_fc.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_proj.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.k_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.masked_bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.out_proj.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.out_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.q_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.v_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_1.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_1.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_2.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_2.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.mlp.c_fc.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_fc.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_proj.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.k_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.masked_bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.out_proj.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.out_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.q_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.v_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_1.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_1.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_2.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_2.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.mlp.c_fc.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_fc.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_proj.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.k_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.masked_bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.out_proj.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.out_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.q_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.v_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_1.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_1.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_2.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_2.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.mlp.c_fc.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_fc.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_proj.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.k_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.masked_bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.out_proj.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.out_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.q_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.v_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_1.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_1.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_2.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_2.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.mlp.c_fc.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_fc.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_proj.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.k_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.masked_bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.out_proj.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.out_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.q_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.v_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_1.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_1.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_2.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_2.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.mlp.c_fc.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_fc.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_proj.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.k_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.masked_bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.out_proj.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.out_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.q_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.v_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_1.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_1.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_2.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_2.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.mlp.c_fc.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_fc.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_proj.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.k_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.masked_bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.out_proj.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.out_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.q_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.v_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_1.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_1.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_2.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_2.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.mlp.c_fc.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_fc.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_proj.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.3.attn.attention.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.k_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.masked_bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.out_proj.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.out_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.q_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.v_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_1.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_1.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_2.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_2.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.mlp.c_fc.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_fc.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_proj.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.30.attn.attention.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.k_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.masked_bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.out_proj.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.out_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.q_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.v_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_1.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_1.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_2.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_2.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.mlp.c_fc.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_fc.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_proj.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.k_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.masked_bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.out_proj.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.out_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.q_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.v_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_1.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_1.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_2.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_2.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.mlp.c_fc.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_fc.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_proj.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_proj.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.4.attn.attention.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.k_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.masked_bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.out_proj.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.out_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.q_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.v_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_1.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_1.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_2.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_2.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.mlp.c_fc.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_fc.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_proj.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.k_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.masked_bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.out_proj.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.out_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.q_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.v_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_1.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_1.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_2.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_2.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.mlp.c_fc.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_fc.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_proj.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.k_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.masked_bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.out_proj.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.out_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.q_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.v_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_1.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_1.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_2.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_2.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.mlp.c_fc.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_fc.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_proj.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.k_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.masked_bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.out_proj.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.out_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.q_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.v_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_1.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_1.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_2.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_2.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.mlp.c_fc.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_fc.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_proj.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.k_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.masked_bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.out_proj.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.out_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.q_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.v_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_1.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_1.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_2.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_2.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.mlp.c_fc.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_fc.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_proj.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.k_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.masked_bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.out_proj.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.out_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.q_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.v_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_1.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_1.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_2.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_2.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.mlp.c_fc.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_fc.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_proj.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.ln_f.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.ln_f.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.wpe.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.wte.weight": "pytorch_model-00002-of-00034.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 2048,
+  "pad_token": null,
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff