zolicsaki commited on
Commit
6ca30eb
1 Parent(s): 2e139a7

Upload folder using huggingface_hub (#1)

Browse files

- b530340fe0b5c846266c1eaedfa307e4fa07f1e59464cfaaaa1400852bd798bf (f134813611a6be294845cbe269660b1a4d15b260)
- 218a6571a76d38c1673f30abc905677e4556566baf39cf412b18c8c81f2094a2 (e3022b831695da00534071f19888621217dce5b3)
- 1fedd59713ef42692ec92f1953ec6824c7933fb128a0cead96c99a13c2f4b07f (f68a369536be25c225a7f54e2021505eabb7c95a)
- df73a929a10b36bf05ddf305cbe9326b008f0649a76f9061de113eadcdd792fa (cf62f80461427f7d710ed6504f7ad3d26dbe9907)
- 01c79a611bd8e23b244d7b010e8c071080a7a801d4a223c41ff886488cb03249 (8bf74b46bf563c68853d6071ec2ee6127fa4e9b1)
- 5afaa4594fe0880cc134cacf57240f814916ca57ea8ef29530a234618f895de8 (42f4043de9194f1710534d5e618da3c15bbb4b3d)
- 2eb32e40173fdddb74d88ee7c718b77f4310d5bdb3c153dcc3664eb37a7d1b72 (cd740d04fbd36d40fcc43093ba44aec488d3fbf4)
- 27e7c21ff0846b112e8db994184225cacdf67f3c62e16372c307ce535d663cf6 (4bfdf30db0f2500c6b81516337fcdf45aea465f8)
- 92e73adf136ba7bcf9a82687afcd6839874c73a9c79fa9ed6c6c5ce0f61c00c4 (ebd33b079321c2921f035f59e46c6f3cd9653792)
- 559b5197823a007388c51f6b5f7b9e4c51c6d41c39103cb8bcd0edfc1a441996 (4bc7424bdb38f25f5257421ecef123296fa31ceb)
- a7fc1f984b5b8bc8c0f98036fe2911db208971082e046793769b838c58150136 (1c29db3bf15be51a9b712cb2e059fdf08c173c63)
- ce844f244e97d8c01e4e0553d3707a3de51ca918993aeab9e5aae25111ae2049 (bfb9d56cabd4acb7c67e52f87f909dae8655d62c)
- 9a4a9de453d1f12bc79b080d67dc740b3b15f37a5661560a13aa5804de589c66 (11d03646437fa45df538845441704fa571fa1c6b)
- 3bc07bef0b5ee2761dd5d0a7fe37c5258f12e383772860099eacc4acb8b12168 (efe147d9b793ae4a9d33af44f61763c4b2e8ad26)
- bfff81f796be5c91071e384649021b6d41db92cae8d94e5316b5c6ae8e7f4e59 (3083a414bfefb2e06e0ea5bc581efc1a6bd621dc)
- 648abb402f74a3e727d1e2a91e5cbf18db05bcf5209d36aea318f2dd1318c961 (fa61e4ac97278b7b53a2de6102ab850e76811e82)
- 82ee1e2f0a9a73d731244ff945c5de340921245f9e6c5653897dec3bd97af3c5 (80d503aed021876dbce0877c7760caa490ab418e)
- 441123ce889c15b78f50c320106809814a456cdec533ded7270c073aae041a1b (58650ccb64e30ab81437a9850d615006b88c78a2)
- e6f73d8b18141db5b5c8ab9f96c1f978d113a3f2d8b82345c9b67b50c47987dc (a26def89fc3a0f4d941a0c06d700379e68d7ff40)
- 487ef42018e1bab756b496062f2d4593fca217d7c3ba28a954d1eb41851d191d (c5cd5ecd2828b96a80fecfdabcc8d2f2c54bb320)
- 43a098dda958b1fde3f2a47fbf178e1f714f0150f4cce209ff106e77d414258c (0ed1b8f62dd20a09d2956985de19ccec8d3866e6)
- caa4002a1913eddd9ad574e2288d68c10c116322a879b734fac983b1cb1c20cd (3911463181fdff5afe4d4416c525d179eedc03b1)
- 2f4410ae6077bca6637822de410c7888d20274841db979c3745c270361369728 (3e5782e378525b38abfc56152fa7ddcdee76eebb)
- fe4cd3d605bd2f3850fd2d14ff03df4ba902679e35b80b3116b213deb7376612 (5dd912c24676fafc0fca9c62b5c9fd938c62f804)
- 92994274924869f4345eb4d5cc60aa0270217903be93e9c142caf1a3fb8f8c92 (d893583d26d849ab371b5c367dcb50c514eb0667)
- 3b4b81451184077321bebb76efe291e44f581f2571824819f4894e61a60b05c0 (b85168d6ba42aed1e63cf5fc41cf9d90a3f9a5d2)
- 2f5275652492dbc6f3d9f9258166b6079842abb5c1b018651ced376717aea758 (d8a4cbc8c8902a599e20c60a9a8ed72fcb213f02)
- c73ee6f8f1e701c0ccff2dc7ebd1596fcbfe05ddfd6c4d7d91535981264adb8f (d3ded9f6431afee73e6ed8e43c686e59fcc88ce0)
- d424eeb699352f0819e665a4845177f075d5ccab3054a55050a197240b47f096 (cda483e3b01220048b41233d03c2534b77449e28)
- 66db5d3e07bcd46a892491e93394e5b197455dcc68a27191ad270ae22b32b744 (e4c661924ffc250dc7ea58407bc83fdfe3f6f550)
- e2de5edfc19b7817affe6692659d6c400768625323167c72fb1b764a21b8457b (25ff39251f7561074b6128f6e26e7355d8f8ffc3)
- 42015264e7ba6a78ab2e875fbb779efddacf7d35d1d91591d90e80ca5182f2f8 (5949986e333a1407b8106b318dcbc0ba6eb81a0f)
- 47d50287e1a1acfa29581ae3c62dba72fa2501d8e62c0dbd58d7360b3c4fa6b7 (268944dd65b652bfeeb4186a002389a7a857fa2b)
- 2421879d89067514dbb9b677abbe55d8c1546a97469a4a822428d2dbd1a074a9 (0e7f61b7ce2468a920e56c3f60d763740ac99fbb)
- 337ee36068844f9d5f2ee004a6cddd798087de9baca05bd9b80c20463233e92c (474fc96ce424895b00a754358681db9c4b59c351)
- 913686eaf9f4533f0e5b2802d680044e3788ce95339109f061c0d3002c5a5ee0 (835d05e28f612c36d2d975ce42763a0edacca11f)

added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<s>": 1,
4
+ "<unk>": 0
5
+ }
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/import/ml-sc-nlpcheckpoints-scratch3/zoltanc/70b_international_llamas/hu_4000_ckpt_bin/config.json",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 4096,
15
+ "model_name": "",
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 80,
19
+ "num_key_value_heads": 8,
20
+ "pad_token_id": 0,
21
+ "pretraining_tp": 1,
22
+ "return_dict": false,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.31.0",
29
+ "use_cache": false,
30
+ "vocab_size": 57344
31
+ }
pytorch_model-01-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc67a605d23bfab9f6d5128c0837cc17f1be2240e04eab5e5c23c44877d44c07
3
+ size 3892352979
pytorch_model-02-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ea9ab811082ebbc8c15128491f2aee6aa4d76068f34e819dd1f0e64d544701
3
+ size 4194411495
pytorch_model-03-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2716805ef2d1be5600a118bd09d04cdcaae127f55d960e319a77953dc0df49a0
3
+ size 3892386649
pytorch_model-04-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d24f8dfc897dc008012c7f28e494083a37b6bd62058bdc87352a5a8cb1653ba
3
+ size 3892386649
pytorch_model-05-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5672fe22816ffcd9b91dc584439c853f0325e39bdbe11079fc65fb999d53ae
3
+ size 4194411495
pytorch_model-06-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0fd1bac4139a8c38d946e03f8fc2ac8e949983e4f756040b0a36857651f70ab
3
+ size 3892386649
pytorch_model-07-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e6b7c20ffb0cafd5a512b5bb8a08d50b7f4275419b164cafbe76595130d843
3
+ size 3892386649
pytorch_model-08-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30fe09ae53f0e4d65c0e3385ee336df4bca2fa642c9e6aacf1828753feed1fa
3
+ size 4194411495
pytorch_model-09-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c9aadc1651baa6415fd58f7cd9ad97d533ae76c92a64db7fb85c51320eed537
3
+ size 3892386649
pytorch_model-10-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2012c1dbcdede5ab6b11f13e0fbe0b2522303d15315f413d0d24fe000497a1e
3
+ size 3892386649
pytorch_model-11-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0a83eb5d7d3ce248bd3be9e3296e87e1c3dcd82b773d2104adffe46295ef74
3
+ size 4194411589
pytorch_model-12-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199c91bdb43fd0e26681b1f2963994cfbd84ee22a8f8aef2a252b18b14341a39
3
+ size 3892386672
pytorch_model-13-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a5a8f6869f11d6a20f59e512e3c3e25f14e24d10d96f1295f3461cdc0a9407a
3
+ size 3892386672
pytorch_model-14-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04654020bf232ae3912c357edebabbec83d032ab6ca272acf2011a0f8f0bfea1
3
+ size 4194411589
pytorch_model-15-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33904ce89b72b02a257185fa8597eb0b950e0bdc186b6cf9a4bbd37c08f5252
3
+ size 3892386672
pytorch_model-16-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d8bfcc841ccd9fa41cad1b4b1241572d9ec96bb5f83104a94fcc02647477235
3
+ size 3892386672
pytorch_model-17-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1655e7ef1309156ccc4f917a5b886e50956fa46421c8fdd286336238a5f653
3
+ size 4194411589
pytorch_model-18-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae89b122553b1df235c42f153945600815f860d9e5be2ae46bbf90839427b6f
3
+ size 3892386672
pytorch_model-19-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26937bf020ed9dc0c122faf4dfc654340df9ebbe3369d5cad5d9330a9ebff42e
3
+ size 3892386672
pytorch_model-20-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f294fdb4fc91753777ce2ea029d09b893870c17882deef698faedf0a7311c74f
3
+ size 4194411589
pytorch_model-21-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb2ba953910de2d4504e9c4a83f4b8b90354455f513142e11a2b771b3564a12
3
+ size 3892386672
pytorch_model-22-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e7957d6fd9ca714d9038ac16bb8f03841400710793768c1db8c87f56e04d22
3
+ size 3892386672
pytorch_model-23-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf20590b2dabb1f7d053c1731a19761b63a59805054a441d423e451f0d38be7f
3
+ size 4194411589
pytorch_model-24-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e599cdeed44d96ac35f00e3cb467ee7c184a97b3e8daad137e8293cf3d2c6b9d
3
+ size 3892386672
pytorch_model-25-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31cbaa7e9131e62ea24901c3de8d70c4c45c7452e2549c3703c447c705dd5844
3
+ size 3892386672
pytorch_model-26-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d5c77715998bf6cf22596354ab29421ae63dd1bf3637f31735112c0aaefa8f
3
+ size 4194411589
pytorch_model-27-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3825d1c2eb4d48f8f0c75584af9a7db926f6c16b3e361f837824580472b1c366
3
+ size 3892386672
pytorch_model-28-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4169ef02dbd36304e5acd6620dcd1a53dd554e5c3ad54cc678ff885a95325f6
3
+ size 3892386672
pytorch_model-29-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c76453ea65cfb244d68eed7df88e25a2362734bf5c518be62ba4fe5165c531
3
+ size 4194411589
pytorch_model-30-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e8529dfd073a6b132978351b078c33a3b06fc09e0695c89b54c5b2e876b4aa
3
+ size 3892386672
pytorch_model-31-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2105d4915d49301884862b53522fb8ad82d9e48b9d4470c1f2fd33cc3b861d62
3
+ size 3892386672
pytorch_model-32-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1542db2a46ee332d763ab9819c24689c636bfaae99f5ab6c307568cb89132ba
3
+ size 4194411589
pytorch_model-33-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5044a56ad83b1b581d755822b060b815714f0b337048b104ee68c830febb9a33
3
+ size 3892386672
pytorch_model-34-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d800719e5a8201fc35a525cae54343a40c550180c462d603cde41eae4731326
3
+ size 3892386672
pytorch_model-35-of-35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1c9f20fc5bad4444eeb03adced7f6b3181b6c13aa9931aa82fa26b97564ec4
3
+ size 3120649182
pytorch_model.bin.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {}, "weight_map": {"model.embed_tokens.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.q_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.k_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.v_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.o_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.gate_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.up_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.mlp.down_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.0.input_layernorm.weight": "pytorch_model-01-of-35.bin", "model.layers.0.post_attention_layernorm.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.q_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.k_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.v_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.o_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.gate_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.up_proj.weight": "pytorch_model-01-of-35.bin", "model.layers.1.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.1.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.1.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.gate_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.up_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.2.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.2.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.gate_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.up_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.mlp.down_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.3.input_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.3.post_attention_layernorm.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.q_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.k_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.v_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.o_proj.weight": "pytorch_model-02-of-35.bin", "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-02-of-35.bin", "model.layers.4.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.mlp.up_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.mlp.down_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.4.input_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.4.post_attention_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.q_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.k_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.v_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.o_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.up_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.mlp.down_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.5.input_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.5.post_attention_layernorm.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.q_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.k_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.v_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.o_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-03-of-35.bin", "model.layers.6.mlp.gate_proj.weight": "pytorch_model-03-of-35.bin", "model.layers.6.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.6.mlp.down_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.6.input_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.6.post_attention_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.q_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.k_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.v_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.o_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.gate_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.mlp.down_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.7.input_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.7.post_attention_layernorm.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.q_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.k_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.v_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.o_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.gate_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.up_proj.weight": "pytorch_model-04-of-35.bin", "model.layers.8.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.8.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.8.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.gate_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.up_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.9.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.9.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.gate_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.up_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.mlp.down_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.10.input_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.10.post_attention_layernorm.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.q_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.k_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.v_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.o_proj.weight": "pytorch_model-05-of-35.bin", "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-05-of-35.bin", "model.layers.11.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.mlp.up_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.mlp.down_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.11.input_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.11.post_attention_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.q_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.k_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.v_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.o_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.up_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.mlp.down_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.12.input_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.12.post_attention_layernorm.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.q_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.k_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.v_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.o_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-06-of-35.bin", "model.layers.13.mlp.gate_proj.weight": "pytorch_model-06-of-35.bin", "model.layers.13.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.13.mlp.down_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.13.input_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.13.post_attention_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.q_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.k_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.v_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.o_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.gate_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.mlp.down_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.14.input_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.14.post_attention_layernorm.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.q_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.k_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.v_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.o_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.gate_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.up_proj.weight": "pytorch_model-07-of-35.bin", "model.layers.15.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.15.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.15.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.gate_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.up_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.16.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.16.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.gate_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.up_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.mlp.down_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.17.input_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.17.post_attention_layernorm.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.q_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.k_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.v_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.o_proj.weight": "pytorch_model-08-of-35.bin", "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-08-of-35.bin", "model.layers.18.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.mlp.up_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.mlp.down_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.18.input_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.18.post_attention_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.q_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.k_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.v_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.o_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.up_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.mlp.down_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.19.input_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.19.post_attention_layernorm.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.q_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.k_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.v_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.o_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-09-of-35.bin", "model.layers.20.mlp.gate_proj.weight": "pytorch_model-09-of-35.bin", "model.layers.20.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.20.mlp.down_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.20.input_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.20.post_attention_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.q_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.k_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.v_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.o_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.gate_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.mlp.down_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.21.input_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.21.post_attention_layernorm.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.q_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.k_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.v_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.o_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.gate_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.up_proj.weight": "pytorch_model-10-of-35.bin", "model.layers.22.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.22.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.22.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.gate_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.up_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.23.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.23.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.gate_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.up_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.mlp.down_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.24.input_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.24.post_attention_layernorm.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.q_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.k_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.v_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.o_proj.weight": "pytorch_model-11-of-35.bin", "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-11-of-35.bin", "model.layers.25.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.mlp.up_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.mlp.down_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.25.input_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.25.post_attention_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.q_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.k_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.v_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.o_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.up_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.mlp.down_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.26.input_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.26.post_attention_layernorm.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.q_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.k_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.v_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.o_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-12-of-35.bin", "model.layers.27.mlp.gate_proj.weight": "pytorch_model-12-of-35.bin", "model.layers.27.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.27.mlp.down_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.27.input_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.27.post_attention_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.q_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.k_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.v_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.o_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.gate_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.mlp.down_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.28.input_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.28.post_attention_layernorm.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.q_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.k_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.v_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.o_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.gate_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.up_proj.weight": "pytorch_model-13-of-35.bin", "model.layers.29.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.29.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.29.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.gate_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.up_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.30.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.30.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.gate_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.up_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.mlp.down_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.31.input_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.31.post_attention_layernorm.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.q_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.k_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.v_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.o_proj.weight": "pytorch_model-14-of-35.bin", "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-14-of-35.bin", "model.layers.32.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.mlp.up_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.mlp.down_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.32.input_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.32.post_attention_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.q_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.k_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.v_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.o_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.up_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.mlp.down_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.33.input_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.33.post_attention_layernorm.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.q_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.k_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.v_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.o_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-15-of-35.bin", "model.layers.34.mlp.gate_proj.weight": "pytorch_model-15-of-35.bin", "model.layers.34.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.34.mlp.down_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.34.input_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.34.post_attention_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.q_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.k_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.v_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.o_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.gate_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.mlp.down_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.35.input_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.35.post_attention_layernorm.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.q_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.k_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.v_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.o_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.gate_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.up_proj.weight": "pytorch_model-16-of-35.bin", "model.layers.36.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.36.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.36.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.gate_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.up_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.37.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.37.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.gate_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.up_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.mlp.down_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.38.input_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.38.post_attention_layernorm.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.q_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.k_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.v_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.o_proj.weight": "pytorch_model-17-of-35.bin", "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-17-of-35.bin", "model.layers.39.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.mlp.up_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.mlp.down_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.39.input_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.39.post_attention_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.q_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.k_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.v_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.o_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.up_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.mlp.down_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.40.input_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.40.post_attention_layernorm.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.q_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.k_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.v_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.o_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-18-of-35.bin", "model.layers.41.mlp.gate_proj.weight": "pytorch_model-18-of-35.bin", "model.layers.41.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.41.mlp.down_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.41.input_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.41.post_attention_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.q_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.k_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.v_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.o_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.gate_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.mlp.down_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.42.input_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.42.post_attention_layernorm.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.q_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.k_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.v_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.o_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.gate_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.up_proj.weight": "pytorch_model-19-of-35.bin", "model.layers.43.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.43.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.43.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.gate_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.up_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.44.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.44.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.gate_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.up_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.mlp.down_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.45.input_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.45.post_attention_layernorm.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.q_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.k_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.v_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.o_proj.weight": "pytorch_model-20-of-35.bin", "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-20-of-35.bin", "model.layers.46.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.mlp.up_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.mlp.down_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.46.input_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.46.post_attention_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.q_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.k_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.v_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.o_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.up_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.mlp.down_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.47.input_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.47.post_attention_layernorm.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.q_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.k_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.v_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.o_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-21-of-35.bin", "model.layers.48.mlp.gate_proj.weight": "pytorch_model-21-of-35.bin", "model.layers.48.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.48.mlp.down_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.48.input_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.48.post_attention_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.q_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.k_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.v_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.o_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.gate_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.mlp.down_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.49.input_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.49.post_attention_layernorm.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.q_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.k_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.v_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.o_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.gate_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.up_proj.weight": "pytorch_model-22-of-35.bin", "model.layers.50.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.50.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.50.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.gate_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.up_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.51.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.51.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.gate_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.up_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.mlp.down_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.52.input_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.52.post_attention_layernorm.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.q_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.k_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.v_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.o_proj.weight": "pytorch_model-23-of-35.bin", "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-23-of-35.bin", "model.layers.53.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.mlp.up_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.mlp.down_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.53.input_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.53.post_attention_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.q_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.k_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.v_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.o_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.up_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.mlp.down_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.54.input_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.54.post_attention_layernorm.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.q_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.k_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.v_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.o_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-24-of-35.bin", "model.layers.55.mlp.gate_proj.weight": "pytorch_model-24-of-35.bin", "model.layers.55.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.55.mlp.down_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.55.input_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.55.post_attention_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.q_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.k_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.v_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.o_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.gate_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.mlp.down_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.56.input_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.56.post_attention_layernorm.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.q_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.k_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.v_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.o_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.gate_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.up_proj.weight": "pytorch_model-25-of-35.bin", "model.layers.57.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.57.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.57.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.gate_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.up_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.58.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.58.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.gate_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.up_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.mlp.down_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.59.input_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.59.post_attention_layernorm.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.q_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.k_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.v_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.o_proj.weight": "pytorch_model-26-of-35.bin", "model.layers.60.self_attn.rotary_emb.inv_freq": "pytorch_model-26-of-35.bin", "model.layers.60.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.mlp.up_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.mlp.down_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.60.input_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.60.post_attention_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.q_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.k_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.v_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.o_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.self_attn.rotary_emb.inv_freq": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.up_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.mlp.down_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.61.input_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.61.post_attention_layernorm.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.q_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.k_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.v_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.o_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.self_attn.rotary_emb.inv_freq": "pytorch_model-27-of-35.bin", "model.layers.62.mlp.gate_proj.weight": "pytorch_model-27-of-35.bin", "model.layers.62.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.62.mlp.down_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.62.input_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.62.post_attention_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.q_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.k_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.v_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.o_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.self_attn.rotary_emb.inv_freq": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.gate_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.mlp.down_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.63.input_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.63.post_attention_layernorm.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.q_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.k_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.v_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.o_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.self_attn.rotary_emb.inv_freq": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.gate_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.up_proj.weight": "pytorch_model-28-of-35.bin", "model.layers.64.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.64.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.64.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.gate_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.up_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.65.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.65.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.gate_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.up_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.mlp.down_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.66.input_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.66.post_attention_layernorm.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.q_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.k_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.v_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.o_proj.weight": "pytorch_model-29-of-35.bin", "model.layers.67.self_attn.rotary_emb.inv_freq": "pytorch_model-29-of-35.bin", "model.layers.67.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.mlp.up_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.mlp.down_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.67.input_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.67.post_attention_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.q_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.k_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.v_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.o_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.self_attn.rotary_emb.inv_freq": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.up_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.mlp.down_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.68.input_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.68.post_attention_layernorm.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.q_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.k_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.v_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.o_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.self_attn.rotary_emb.inv_freq": "pytorch_model-30-of-35.bin", "model.layers.69.mlp.gate_proj.weight": "pytorch_model-30-of-35.bin", "model.layers.69.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.69.mlp.down_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.69.input_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.69.post_attention_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.q_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.k_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.v_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.o_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.self_attn.rotary_emb.inv_freq": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.gate_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.mlp.down_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.70.input_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.70.post_attention_layernorm.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.q_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.k_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.v_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.o_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.self_attn.rotary_emb.inv_freq": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.gate_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.up_proj.weight": "pytorch_model-31-of-35.bin", "model.layers.71.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.71.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.71.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.gate_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.up_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.72.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.72.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.gate_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.up_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.mlp.down_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.73.input_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.73.post_attention_layernorm.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.q_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.k_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.v_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.o_proj.weight": "pytorch_model-32-of-35.bin", "model.layers.74.self_attn.rotary_emb.inv_freq": "pytorch_model-32-of-35.bin", "model.layers.74.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.mlp.up_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.mlp.down_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.74.input_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.74.post_attention_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.q_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.k_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.v_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.o_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.self_attn.rotary_emb.inv_freq": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.up_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.mlp.down_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.75.input_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.75.post_attention_layernorm.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.q_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.k_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.v_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.o_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.self_attn.rotary_emb.inv_freq": "pytorch_model-33-of-35.bin", "model.layers.76.mlp.gate_proj.weight": "pytorch_model-33-of-35.bin", "model.layers.76.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.76.mlp.down_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.76.input_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.76.post_attention_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.q_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.k_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.v_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.o_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.self_attn.rotary_emb.inv_freq": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.gate_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.mlp.down_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.77.input_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.77.post_attention_layernorm.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.q_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.k_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.v_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.o_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.self_attn.rotary_emb.inv_freq": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.gate_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.up_proj.weight": "pytorch_model-34-of-35.bin", "model.layers.78.mlp.down_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.78.input_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.78.post_attention_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.q_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.k_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.v_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.o_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.self_attn.rotary_emb.inv_freq": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.gate_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.up_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.mlp.down_proj.weight": "pytorch_model-35-of-35.bin", "model.layers.79.input_layernorm.weight": "pytorch_model-35-of-35.bin", "model.layers.79.post_attention_layernorm.weight": "pytorch_model-35-of-35.bin", "model.norm.weight": "pytorch_model-35-of-35.bin", "lm_head.weight": "pytorch_model-35-of-35.bin"}}
pytorch_model.bin.sambatensor_index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model.embed_tokens.weight": "llamaforcausallm__model__embed_tokens__weight", "model.layers.0.self_attn.q_proj.weight": "llamaforcausallm__model__layers__0__self_attn__q_proj__weight", "model.layers.0.self_attn.k_proj.weight": "llamaforcausallm__model__layers__0__self_attn__k_proj__weight", "model.layers.0.self_attn.v_proj.weight": "llamaforcausallm__model__layers__0__self_attn__v_proj__weight", "model.layers.0.self_attn.o_proj.weight": "llamaforcausallm__model__layers__0__self_attn__o_proj__weight", "model.layers.0.mlp.gate_proj.weight": "llamaforcausallm__model__layers__0__mlp__gate_proj__weight", "model.layers.0.mlp.up_proj.weight": "llamaforcausallm__model__layers__0__mlp__up_proj__weight", "model.layers.0.mlp.down_proj.weight": "llamaforcausallm__model__layers__0__mlp__down_proj__weight", "model.layers.0.input_layernorm.weight": "llamaforcausallm__model__layers__0__input_layernorm__weight", "model.layers.0.post_attention_layernorm.weight": "llamaforcausallm__model__layers__0__post_attention_layernorm__weight", "model.layers.1.self_attn.q_proj.weight": "llamaforcausallm__model__layers__1__self_attn__q_proj__weight", "model.layers.1.self_attn.k_proj.weight": "llamaforcausallm__model__layers__1__self_attn__k_proj__weight", "model.layers.1.self_attn.v_proj.weight": "llamaforcausallm__model__layers__1__self_attn__v_proj__weight", "model.layers.1.self_attn.o_proj.weight": "llamaforcausallm__model__layers__1__self_attn__o_proj__weight", "model.layers.1.mlp.gate_proj.weight": "llamaforcausallm__model__layers__1__mlp__gate_proj__weight", "model.layers.1.mlp.up_proj.weight": "llamaforcausallm__model__layers__1__mlp__up_proj__weight", "model.layers.1.mlp.down_proj.weight": "llamaforcausallm__model__layers__1__mlp__down_proj__weight", "model.layers.1.input_layernorm.weight": "llamaforcausallm__model__layers__1__input_layernorm__weight", "model.layers.1.post_attention_layernorm.weight": "llamaforcausallm__model__layers__1__post_attention_layernorm__weight", "model.layers.2.self_attn.q_proj.weight": "llamaforcausallm__model__layers__2__self_attn__q_proj__weight", "model.layers.2.self_attn.k_proj.weight": "llamaforcausallm__model__layers__2__self_attn__k_proj__weight", "model.layers.2.self_attn.v_proj.weight": "llamaforcausallm__model__layers__2__self_attn__v_proj__weight", "model.layers.2.self_attn.o_proj.weight": "llamaforcausallm__model__layers__2__self_attn__o_proj__weight", "model.layers.2.mlp.gate_proj.weight": "llamaforcausallm__model__layers__2__mlp__gate_proj__weight", "model.layers.2.mlp.up_proj.weight": "llamaforcausallm__model__layers__2__mlp__up_proj__weight", "model.layers.2.mlp.down_proj.weight": "llamaforcausallm__model__layers__2__mlp__down_proj__weight", "model.layers.2.input_layernorm.weight": "llamaforcausallm__model__layers__2__input_layernorm__weight", "model.layers.2.post_attention_layernorm.weight": "llamaforcausallm__model__layers__2__post_attention_layernorm__weight", "model.layers.3.self_attn.q_proj.weight": "llamaforcausallm__model__layers__3__self_attn__q_proj__weight", "model.layers.3.self_attn.k_proj.weight": "llamaforcausallm__model__layers__3__self_attn__k_proj__weight", "model.layers.3.self_attn.v_proj.weight": "llamaforcausallm__model__layers__3__self_attn__v_proj__weight", "model.layers.3.self_attn.o_proj.weight": "llamaforcausallm__model__layers__3__self_attn__o_proj__weight", "model.layers.3.mlp.gate_proj.weight": "llamaforcausallm__model__layers__3__mlp__gate_proj__weight", "model.layers.3.mlp.up_proj.weight": "llamaforcausallm__model__layers__3__mlp__up_proj__weight", "model.layers.3.mlp.down_proj.weight": "llamaforcausallm__model__layers__3__mlp__down_proj__weight", "model.layers.3.input_layernorm.weight": "llamaforcausallm__model__layers__3__input_layernorm__weight", "model.layers.3.post_attention_layernorm.weight": "llamaforcausallm__model__layers__3__post_attention_layernorm__weight", "model.layers.4.self_attn.q_proj.weight": "llamaforcausallm__model__layers__4__self_attn__q_proj__weight", "model.layers.4.self_attn.k_proj.weight": "llamaforcausallm__model__layers__4__self_attn__k_proj__weight", "model.layers.4.self_attn.v_proj.weight": "llamaforcausallm__model__layers__4__self_attn__v_proj__weight", "model.layers.4.self_attn.o_proj.weight": "llamaforcausallm__model__layers__4__self_attn__o_proj__weight", "model.layers.4.mlp.gate_proj.weight": "llamaforcausallm__model__layers__4__mlp__gate_proj__weight", "model.layers.4.mlp.up_proj.weight": "llamaforcausallm__model__layers__4__mlp__up_proj__weight", "model.layers.4.mlp.down_proj.weight": "llamaforcausallm__model__layers__4__mlp__down_proj__weight", "model.layers.4.input_layernorm.weight": "llamaforcausallm__model__layers__4__input_layernorm__weight", "model.layers.4.post_attention_layernorm.weight": "llamaforcausallm__model__layers__4__post_attention_layernorm__weight", "model.layers.5.self_attn.q_proj.weight": "llamaforcausallm__model__layers__5__self_attn__q_proj__weight", "model.layers.5.self_attn.k_proj.weight": "llamaforcausallm__model__layers__5__self_attn__k_proj__weight", "model.layers.5.self_attn.v_proj.weight": "llamaforcausallm__model__layers__5__self_attn__v_proj__weight", "model.layers.5.self_attn.o_proj.weight": "llamaforcausallm__model__layers__5__self_attn__o_proj__weight", "model.layers.5.mlp.gate_proj.weight": "llamaforcausallm__model__layers__5__mlp__gate_proj__weight", "model.layers.5.mlp.up_proj.weight": "llamaforcausallm__model__layers__5__mlp__up_proj__weight", "model.layers.5.mlp.down_proj.weight": "llamaforcausallm__model__layers__5__mlp__down_proj__weight", "model.layers.5.input_layernorm.weight": "llamaforcausallm__model__layers__5__input_layernorm__weight", "model.layers.5.post_attention_layernorm.weight": "llamaforcausallm__model__layers__5__post_attention_layernorm__weight", "model.layers.6.self_attn.q_proj.weight": "llamaforcausallm__model__layers__6__self_attn__q_proj__weight", "model.layers.6.self_attn.k_proj.weight": "llamaforcausallm__model__layers__6__self_attn__k_proj__weight", "model.layers.6.self_attn.v_proj.weight": "llamaforcausallm__model__layers__6__self_attn__v_proj__weight", "model.layers.6.self_attn.o_proj.weight": "llamaforcausallm__model__layers__6__self_attn__o_proj__weight", "model.layers.6.mlp.gate_proj.weight": "llamaforcausallm__model__layers__6__mlp__gate_proj__weight", "model.layers.6.mlp.up_proj.weight": "llamaforcausallm__model__layers__6__mlp__up_proj__weight", "model.layers.6.mlp.down_proj.weight": "llamaforcausallm__model__layers__6__mlp__down_proj__weight", "model.layers.6.input_layernorm.weight": "llamaforcausallm__model__layers__6__input_layernorm__weight", "model.layers.6.post_attention_layernorm.weight": "llamaforcausallm__model__layers__6__post_attention_layernorm__weight", "model.layers.7.self_attn.q_proj.weight": "llamaforcausallm__model__layers__7__self_attn__q_proj__weight", "model.layers.7.self_attn.k_proj.weight": "llamaforcausallm__model__layers__7__self_attn__k_proj__weight", "model.layers.7.self_attn.v_proj.weight": "llamaforcausallm__model__layers__7__self_attn__v_proj__weight", "model.layers.7.self_attn.o_proj.weight": "llamaforcausallm__model__layers__7__self_attn__o_proj__weight", "model.layers.7.mlp.gate_proj.weight": "llamaforcausallm__model__layers__7__mlp__gate_proj__weight", "model.layers.7.mlp.up_proj.weight": "llamaforcausallm__model__layers__7__mlp__up_proj__weight", "model.layers.7.mlp.down_proj.weight": "llamaforcausallm__model__layers__7__mlp__down_proj__weight", "model.layers.7.input_layernorm.weight": "llamaforcausallm__model__layers__7__input_layernorm__weight", "model.layers.7.post_attention_layernorm.weight": "llamaforcausallm__model__layers__7__post_attention_layernorm__weight", "model.layers.8.self_attn.q_proj.weight": "llamaforcausallm__model__layers__8__self_attn__q_proj__weight", "model.layers.8.self_attn.k_proj.weight": "llamaforcausallm__model__layers__8__self_attn__k_proj__weight", "model.layers.8.self_attn.v_proj.weight": "llamaforcausallm__model__layers__8__self_attn__v_proj__weight", "model.layers.8.self_attn.o_proj.weight": "llamaforcausallm__model__layers__8__self_attn__o_proj__weight", "model.layers.8.mlp.gate_proj.weight": "llamaforcausallm__model__layers__8__mlp__gate_proj__weight", "model.layers.8.mlp.up_proj.weight": "llamaforcausallm__model__layers__8__mlp__up_proj__weight", "model.layers.8.mlp.down_proj.weight": "llamaforcausallm__model__layers__8__mlp__down_proj__weight", "model.layers.8.input_layernorm.weight": "llamaforcausallm__model__layers__8__input_layernorm__weight", "model.layers.8.post_attention_layernorm.weight": "llamaforcausallm__model__layers__8__post_attention_layernorm__weight", "model.layers.9.self_attn.q_proj.weight": "llamaforcausallm__model__layers__9__self_attn__q_proj__weight", "model.layers.9.self_attn.k_proj.weight": "llamaforcausallm__model__layers__9__self_attn__k_proj__weight", "model.layers.9.self_attn.v_proj.weight": "llamaforcausallm__model__layers__9__self_attn__v_proj__weight", "model.layers.9.self_attn.o_proj.weight": "llamaforcausallm__model__layers__9__self_attn__o_proj__weight", "model.layers.9.mlp.gate_proj.weight": "llamaforcausallm__model__layers__9__mlp__gate_proj__weight", "model.layers.9.mlp.up_proj.weight": "llamaforcausallm__model__layers__9__mlp__up_proj__weight", "model.layers.9.mlp.down_proj.weight": "llamaforcausallm__model__layers__9__mlp__down_proj__weight", "model.layers.9.input_layernorm.weight": "llamaforcausallm__model__layers__9__input_layernorm__weight", "model.layers.9.post_attention_layernorm.weight": "llamaforcausallm__model__layers__9__post_attention_layernorm__weight", "model.layers.10.self_attn.q_proj.weight": "llamaforcausallm__model__layers__10__self_attn__q_proj__weight", "model.layers.10.self_attn.k_proj.weight": "llamaforcausallm__model__layers__10__self_attn__k_proj__weight", "model.layers.10.self_attn.v_proj.weight": "llamaforcausallm__model__layers__10__self_attn__v_proj__weight", "model.layers.10.self_attn.o_proj.weight": "llamaforcausallm__model__layers__10__self_attn__o_proj__weight", "model.layers.10.mlp.gate_proj.weight": "llamaforcausallm__model__layers__10__mlp__gate_proj__weight", "model.layers.10.mlp.up_proj.weight": "llamaforcausallm__model__layers__10__mlp__up_proj__weight", "model.layers.10.mlp.down_proj.weight": "llamaforcausallm__model__layers__10__mlp__down_proj__weight", "model.layers.10.input_layernorm.weight": "llamaforcausallm__model__layers__10__input_layernorm__weight", "model.layers.10.post_attention_layernorm.weight": "llamaforcausallm__model__layers__10__post_attention_layernorm__weight", "model.layers.11.self_attn.q_proj.weight": "llamaforcausallm__model__layers__11__self_attn__q_proj__weight", "model.layers.11.self_attn.k_proj.weight": "llamaforcausallm__model__layers__11__self_attn__k_proj__weight", "model.layers.11.self_attn.v_proj.weight": "llamaforcausallm__model__layers__11__self_attn__v_proj__weight", "model.layers.11.self_attn.o_proj.weight": "llamaforcausallm__model__layers__11__self_attn__o_proj__weight", "model.layers.11.mlp.gate_proj.weight": "llamaforcausallm__model__layers__11__mlp__gate_proj__weight", "model.layers.11.mlp.up_proj.weight": "llamaforcausallm__model__layers__11__mlp__up_proj__weight", "model.layers.11.mlp.down_proj.weight": "llamaforcausallm__model__layers__11__mlp__down_proj__weight", "model.layers.11.input_layernorm.weight": "llamaforcausallm__model__layers__11__input_layernorm__weight", "model.layers.11.post_attention_layernorm.weight": "llamaforcausallm__model__layers__11__post_attention_layernorm__weight", "model.layers.12.self_attn.q_proj.weight": "llamaforcausallm__model__layers__12__self_attn__q_proj__weight", "model.layers.12.self_attn.k_proj.weight": "llamaforcausallm__model__layers__12__self_attn__k_proj__weight", "model.layers.12.self_attn.v_proj.weight": "llamaforcausallm__model__layers__12__self_attn__v_proj__weight", "model.layers.12.self_attn.o_proj.weight": "llamaforcausallm__model__layers__12__self_attn__o_proj__weight", "model.layers.12.mlp.gate_proj.weight": "llamaforcausallm__model__layers__12__mlp__gate_proj__weight", "model.layers.12.mlp.up_proj.weight": "llamaforcausallm__model__layers__12__mlp__up_proj__weight", "model.layers.12.mlp.down_proj.weight": "llamaforcausallm__model__layers__12__mlp__down_proj__weight", "model.layers.12.input_layernorm.weight": "llamaforcausallm__model__layers__12__input_layernorm__weight", "model.layers.12.post_attention_layernorm.weight": "llamaforcausallm__model__layers__12__post_attention_layernorm__weight", "model.layers.13.self_attn.q_proj.weight": "llamaforcausallm__model__layers__13__self_attn__q_proj__weight", "model.layers.13.self_attn.k_proj.weight": "llamaforcausallm__model__layers__13__self_attn__k_proj__weight", "model.layers.13.self_attn.v_proj.weight": "llamaforcausallm__model__layers__13__self_attn__v_proj__weight", "model.layers.13.self_attn.o_proj.weight": "llamaforcausallm__model__layers__13__self_attn__o_proj__weight", "model.layers.13.mlp.gate_proj.weight": "llamaforcausallm__model__layers__13__mlp__gate_proj__weight", "model.layers.13.mlp.up_proj.weight": "llamaforcausallm__model__layers__13__mlp__up_proj__weight", "model.layers.13.mlp.down_proj.weight": "llamaforcausallm__model__layers__13__mlp__down_proj__weight", "model.layers.13.input_layernorm.weight": "llamaforcausallm__model__layers__13__input_layernorm__weight", "model.layers.13.post_attention_layernorm.weight": "llamaforcausallm__model__layers__13__post_attention_layernorm__weight", "model.layers.14.self_attn.q_proj.weight": "llamaforcausallm__model__layers__14__self_attn__q_proj__weight", "model.layers.14.self_attn.k_proj.weight": "llamaforcausallm__model__layers__14__self_attn__k_proj__weight", "model.layers.14.self_attn.v_proj.weight": "llamaforcausallm__model__layers__14__self_attn__v_proj__weight", "model.layers.14.self_attn.o_proj.weight": "llamaforcausallm__model__layers__14__self_attn__o_proj__weight", "model.layers.14.mlp.gate_proj.weight": "llamaforcausallm__model__layers__14__mlp__gate_proj__weight", "model.layers.14.mlp.up_proj.weight": "llamaforcausallm__model__layers__14__mlp__up_proj__weight", "model.layers.14.mlp.down_proj.weight": "llamaforcausallm__model__layers__14__mlp__down_proj__weight", "model.layers.14.input_layernorm.weight": "llamaforcausallm__model__layers__14__input_layernorm__weight", "model.layers.14.post_attention_layernorm.weight": "llamaforcausallm__model__layers__14__post_attention_layernorm__weight", "model.layers.15.self_attn.q_proj.weight": "llamaforcausallm__model__layers__15__self_attn__q_proj__weight", "model.layers.15.self_attn.k_proj.weight": "llamaforcausallm__model__layers__15__self_attn__k_proj__weight", "model.layers.15.self_attn.v_proj.weight": "llamaforcausallm__model__layers__15__self_attn__v_proj__weight", "model.layers.15.self_attn.o_proj.weight": "llamaforcausallm__model__layers__15__self_attn__o_proj__weight", "model.layers.15.mlp.gate_proj.weight": "llamaforcausallm__model__layers__15__mlp__gate_proj__weight", "model.layers.15.mlp.up_proj.weight": "llamaforcausallm__model__layers__15__mlp__up_proj__weight", "model.layers.15.mlp.down_proj.weight": "llamaforcausallm__model__layers__15__mlp__down_proj__weight", "model.layers.15.input_layernorm.weight": "llamaforcausallm__model__layers__15__input_layernorm__weight", "model.layers.15.post_attention_layernorm.weight": "llamaforcausallm__model__layers__15__post_attention_layernorm__weight", "model.layers.16.self_attn.q_proj.weight": "llamaforcausallm__model__layers__16__self_attn__q_proj__weight", "model.layers.16.self_attn.k_proj.weight": "llamaforcausallm__model__layers__16__self_attn__k_proj__weight", "model.layers.16.self_attn.v_proj.weight": "llamaforcausallm__model__layers__16__self_attn__v_proj__weight", "model.layers.16.self_attn.o_proj.weight": "llamaforcausallm__model__layers__16__self_attn__o_proj__weight", "model.layers.16.mlp.gate_proj.weight": "llamaforcausallm__model__layers__16__mlp__gate_proj__weight", "model.layers.16.mlp.up_proj.weight": "llamaforcausallm__model__layers__16__mlp__up_proj__weight", "model.layers.16.mlp.down_proj.weight": "llamaforcausallm__model__layers__16__mlp__down_proj__weight", "model.layers.16.input_layernorm.weight": "llamaforcausallm__model__layers__16__input_layernorm__weight", "model.layers.16.post_attention_layernorm.weight": "llamaforcausallm__model__layers__16__post_attention_layernorm__weight", "model.layers.17.self_attn.q_proj.weight": "llamaforcausallm__model__layers__17__self_attn__q_proj__weight", "model.layers.17.self_attn.k_proj.weight": "llamaforcausallm__model__layers__17__self_attn__k_proj__weight", "model.layers.17.self_attn.v_proj.weight": "llamaforcausallm__model__layers__17__self_attn__v_proj__weight", "model.layers.17.self_attn.o_proj.weight": "llamaforcausallm__model__layers__17__self_attn__o_proj__weight", "model.layers.17.mlp.gate_proj.weight": "llamaforcausallm__model__layers__17__mlp__gate_proj__weight", "model.layers.17.mlp.up_proj.weight": "llamaforcausallm__model__layers__17__mlp__up_proj__weight", "model.layers.17.mlp.down_proj.weight": "llamaforcausallm__model__layers__17__mlp__down_proj__weight", "model.layers.17.input_layernorm.weight": "llamaforcausallm__model__layers__17__input_layernorm__weight", "model.layers.17.post_attention_layernorm.weight": "llamaforcausallm__model__layers__17__post_attention_layernorm__weight", "model.layers.18.self_attn.q_proj.weight": "llamaforcausallm__model__layers__18__self_attn__q_proj__weight", "model.layers.18.self_attn.k_proj.weight": "llamaforcausallm__model__layers__18__self_attn__k_proj__weight", "model.layers.18.self_attn.v_proj.weight": "llamaforcausallm__model__layers__18__self_attn__v_proj__weight", "model.layers.18.self_attn.o_proj.weight": "llamaforcausallm__model__layers__18__self_attn__o_proj__weight", "model.layers.18.mlp.gate_proj.weight": "llamaforcausallm__model__layers__18__mlp__gate_proj__weight", "model.layers.18.mlp.up_proj.weight": "llamaforcausallm__model__layers__18__mlp__up_proj__weight", "model.layers.18.mlp.down_proj.weight": "llamaforcausallm__model__layers__18__mlp__down_proj__weight", "model.layers.18.input_layernorm.weight": "llamaforcausallm__model__layers__18__input_layernorm__weight", "model.layers.18.post_attention_layernorm.weight": "llamaforcausallm__model__layers__18__post_attention_layernorm__weight", "model.layers.19.self_attn.q_proj.weight": "llamaforcausallm__model__layers__19__self_attn__q_proj__weight", "model.layers.19.self_attn.k_proj.weight": "llamaforcausallm__model__layers__19__self_attn__k_proj__weight", "model.layers.19.self_attn.v_proj.weight": "llamaforcausallm__model__layers__19__self_attn__v_proj__weight", "model.layers.19.self_attn.o_proj.weight": "llamaforcausallm__model__layers__19__self_attn__o_proj__weight", "model.layers.19.mlp.gate_proj.weight": "llamaforcausallm__model__layers__19__mlp__gate_proj__weight", "model.layers.19.mlp.up_proj.weight": "llamaforcausallm__model__layers__19__mlp__up_proj__weight", "model.layers.19.mlp.down_proj.weight": "llamaforcausallm__model__layers__19__mlp__down_proj__weight", "model.layers.19.input_layernorm.weight": "llamaforcausallm__model__layers__19__input_layernorm__weight", "model.layers.19.post_attention_layernorm.weight": "llamaforcausallm__model__layers__19__post_attention_layernorm__weight", "model.layers.20.self_attn.q_proj.weight": "llamaforcausallm__model__layers__20__self_attn__q_proj__weight", "model.layers.20.self_attn.k_proj.weight": "llamaforcausallm__model__layers__20__self_attn__k_proj__weight", "model.layers.20.self_attn.v_proj.weight": "llamaforcausallm__model__layers__20__self_attn__v_proj__weight", "model.layers.20.self_attn.o_proj.weight": "llamaforcausallm__model__layers__20__self_attn__o_proj__weight", "model.layers.20.mlp.gate_proj.weight": "llamaforcausallm__model__layers__20__mlp__gate_proj__weight", "model.layers.20.mlp.up_proj.weight": "llamaforcausallm__model__layers__20__mlp__up_proj__weight", "model.layers.20.mlp.down_proj.weight": "llamaforcausallm__model__layers__20__mlp__down_proj__weight", "model.layers.20.input_layernorm.weight": "llamaforcausallm__model__layers__20__input_layernorm__weight", "model.layers.20.post_attention_layernorm.weight": "llamaforcausallm__model__layers__20__post_attention_layernorm__weight", "model.layers.21.self_attn.q_proj.weight": "llamaforcausallm__model__layers__21__self_attn__q_proj__weight", "model.layers.21.self_attn.k_proj.weight": "llamaforcausallm__model__layers__21__self_attn__k_proj__weight", "model.layers.21.self_attn.v_proj.weight": "llamaforcausallm__model__layers__21__self_attn__v_proj__weight", "model.layers.21.self_attn.o_proj.weight": "llamaforcausallm__model__layers__21__self_attn__o_proj__weight", "model.layers.21.mlp.gate_proj.weight": "llamaforcausallm__model__layers__21__mlp__gate_proj__weight", "model.layers.21.mlp.up_proj.weight": "llamaforcausallm__model__layers__21__mlp__up_proj__weight", "model.layers.21.mlp.down_proj.weight": "llamaforcausallm__model__layers__21__mlp__down_proj__weight", "model.layers.21.input_layernorm.weight": "llamaforcausallm__model__layers__21__input_layernorm__weight", "model.layers.21.post_attention_layernorm.weight": "llamaforcausallm__model__layers__21__post_attention_layernorm__weight", "model.layers.22.self_attn.q_proj.weight": "llamaforcausallm__model__layers__22__self_attn__q_proj__weight", "model.layers.22.self_attn.k_proj.weight": "llamaforcausallm__model__layers__22__self_attn__k_proj__weight", "model.layers.22.self_attn.v_proj.weight": "llamaforcausallm__model__layers__22__self_attn__v_proj__weight", "model.layers.22.self_attn.o_proj.weight": "llamaforcausallm__model__layers__22__self_attn__o_proj__weight", "model.layers.22.mlp.gate_proj.weight": "llamaforcausallm__model__layers__22__mlp__gate_proj__weight", "model.layers.22.mlp.up_proj.weight": "llamaforcausallm__model__layers__22__mlp__up_proj__weight", "model.layers.22.mlp.down_proj.weight": "llamaforcausallm__model__layers__22__mlp__down_proj__weight", "model.layers.22.input_layernorm.weight": "llamaforcausallm__model__layers__22__input_layernorm__weight", "model.layers.22.post_attention_layernorm.weight": "llamaforcausallm__model__layers__22__post_attention_layernorm__weight", "model.layers.23.self_attn.q_proj.weight": "llamaforcausallm__model__layers__23__self_attn__q_proj__weight", "model.layers.23.self_attn.k_proj.weight": "llamaforcausallm__model__layers__23__self_attn__k_proj__weight", "model.layers.23.self_attn.v_proj.weight": "llamaforcausallm__model__layers__23__self_attn__v_proj__weight", "model.layers.23.self_attn.o_proj.weight": "llamaforcausallm__model__layers__23__self_attn__o_proj__weight", "model.layers.23.mlp.gate_proj.weight": "llamaforcausallm__model__layers__23__mlp__gate_proj__weight", "model.layers.23.mlp.up_proj.weight": "llamaforcausallm__model__layers__23__mlp__up_proj__weight", "model.layers.23.mlp.down_proj.weight": "llamaforcausallm__model__layers__23__mlp__down_proj__weight", "model.layers.23.input_layernorm.weight": "llamaforcausallm__model__layers__23__input_layernorm__weight", "model.layers.23.post_attention_layernorm.weight": "llamaforcausallm__model__layers__23__post_attention_layernorm__weight", "model.layers.24.self_attn.q_proj.weight": "llamaforcausallm__model__layers__24__self_attn__q_proj__weight", "model.layers.24.self_attn.k_proj.weight": "llamaforcausallm__model__layers__24__self_attn__k_proj__weight", "model.layers.24.self_attn.v_proj.weight": "llamaforcausallm__model__layers__24__self_attn__v_proj__weight", "model.layers.24.self_attn.o_proj.weight": "llamaforcausallm__model__layers__24__self_attn__o_proj__weight", "model.layers.24.mlp.gate_proj.weight": "llamaforcausallm__model__layers__24__mlp__gate_proj__weight", "model.layers.24.mlp.up_proj.weight": "llamaforcausallm__model__layers__24__mlp__up_proj__weight", "model.layers.24.mlp.down_proj.weight": "llamaforcausallm__model__layers__24__mlp__down_proj__weight", "model.layers.24.input_layernorm.weight": "llamaforcausallm__model__layers__24__input_layernorm__weight", "model.layers.24.post_attention_layernorm.weight": "llamaforcausallm__model__layers__24__post_attention_layernorm__weight", "model.layers.25.self_attn.q_proj.weight": "llamaforcausallm__model__layers__25__self_attn__q_proj__weight", "model.layers.25.self_attn.k_proj.weight": "llamaforcausallm__model__layers__25__self_attn__k_proj__weight", "model.layers.25.self_attn.v_proj.weight": "llamaforcausallm__model__layers__25__self_attn__v_proj__weight", "model.layers.25.self_attn.o_proj.weight": "llamaforcausallm__model__layers__25__self_attn__o_proj__weight", "model.layers.25.mlp.gate_proj.weight": "llamaforcausallm__model__layers__25__mlp__gate_proj__weight", "model.layers.25.mlp.up_proj.weight": "llamaforcausallm__model__layers__25__mlp__up_proj__weight", "model.layers.25.mlp.down_proj.weight": "llamaforcausallm__model__layers__25__mlp__down_proj__weight", "model.layers.25.input_layernorm.weight": "llamaforcausallm__model__layers__25__input_layernorm__weight", "model.layers.25.post_attention_layernorm.weight": "llamaforcausallm__model__layers__25__post_attention_layernorm__weight", "model.layers.26.self_attn.q_proj.weight": "llamaforcausallm__model__layers__26__self_attn__q_proj__weight", "model.layers.26.self_attn.k_proj.weight": "llamaforcausallm__model__layers__26__self_attn__k_proj__weight", "model.layers.26.self_attn.v_proj.weight": "llamaforcausallm__model__layers__26__self_attn__v_proj__weight", "model.layers.26.self_attn.o_proj.weight": "llamaforcausallm__model__layers__26__self_attn__o_proj__weight", "model.layers.26.mlp.gate_proj.weight": "llamaforcausallm__model__layers__26__mlp__gate_proj__weight", "model.layers.26.mlp.up_proj.weight": "llamaforcausallm__model__layers__26__mlp__up_proj__weight", "model.layers.26.mlp.down_proj.weight": "llamaforcausallm__model__layers__26__mlp__down_proj__weight", "model.layers.26.input_layernorm.weight": "llamaforcausallm__model__layers__26__input_layernorm__weight", "model.layers.26.post_attention_layernorm.weight": "llamaforcausallm__model__layers__26__post_attention_layernorm__weight", "model.layers.27.self_attn.q_proj.weight": "llamaforcausallm__model__layers__27__self_attn__q_proj__weight", "model.layers.27.self_attn.k_proj.weight": "llamaforcausallm__model__layers__27__self_attn__k_proj__weight", "model.layers.27.self_attn.v_proj.weight": "llamaforcausallm__model__layers__27__self_attn__v_proj__weight", "model.layers.27.self_attn.o_proj.weight": "llamaforcausallm__model__layers__27__self_attn__o_proj__weight", "model.layers.27.mlp.gate_proj.weight": "llamaforcausallm__model__layers__27__mlp__gate_proj__weight", "model.layers.27.mlp.up_proj.weight": "llamaforcausallm__model__layers__27__mlp__up_proj__weight", "model.layers.27.mlp.down_proj.weight": "llamaforcausallm__model__layers__27__mlp__down_proj__weight", "model.layers.27.input_layernorm.weight": "llamaforcausallm__model__layers__27__input_layernorm__weight", "model.layers.27.post_attention_layernorm.weight": "llamaforcausallm__model__layers__27__post_attention_layernorm__weight", "model.layers.28.self_attn.q_proj.weight": "llamaforcausallm__model__layers__28__self_attn__q_proj__weight", "model.layers.28.self_attn.k_proj.weight": "llamaforcausallm__model__layers__28__self_attn__k_proj__weight", "model.layers.28.self_attn.v_proj.weight": "llamaforcausallm__model__layers__28__self_attn__v_proj__weight", "model.layers.28.self_attn.o_proj.weight": "llamaforcausallm__model__layers__28__self_attn__o_proj__weight", "model.layers.28.mlp.gate_proj.weight": "llamaforcausallm__model__layers__28__mlp__gate_proj__weight", "model.layers.28.mlp.up_proj.weight": "llamaforcausallm__model__layers__28__mlp__up_proj__weight", "model.layers.28.mlp.down_proj.weight": "llamaforcausallm__model__layers__28__mlp__down_proj__weight", "model.layers.28.input_layernorm.weight": "llamaforcausallm__model__layers__28__input_layernorm__weight", "model.layers.28.post_attention_layernorm.weight": "llamaforcausallm__model__layers__28__post_attention_layernorm__weight", "model.layers.29.self_attn.q_proj.weight": "llamaforcausallm__model__layers__29__self_attn__q_proj__weight", "model.layers.29.self_attn.k_proj.weight": "llamaforcausallm__model__layers__29__self_attn__k_proj__weight", "model.layers.29.self_attn.v_proj.weight": "llamaforcausallm__model__layers__29__self_attn__v_proj__weight", "model.layers.29.self_attn.o_proj.weight": "llamaforcausallm__model__layers__29__self_attn__o_proj__weight", "model.layers.29.mlp.gate_proj.weight": "llamaforcausallm__model__layers__29__mlp__gate_proj__weight", "model.layers.29.mlp.up_proj.weight": "llamaforcausallm__model__layers__29__mlp__up_proj__weight", "model.layers.29.mlp.down_proj.weight": "llamaforcausallm__model__layers__29__mlp__down_proj__weight", "model.layers.29.input_layernorm.weight": "llamaforcausallm__model__layers__29__input_layernorm__weight", "model.layers.29.post_attention_layernorm.weight": "llamaforcausallm__model__layers__29__post_attention_layernorm__weight", "model.layers.30.self_attn.q_proj.weight": "llamaforcausallm__model__layers__30__self_attn__q_proj__weight", "model.layers.30.self_attn.k_proj.weight": "llamaforcausallm__model__layers__30__self_attn__k_proj__weight", "model.layers.30.self_attn.v_proj.weight": "llamaforcausallm__model__layers__30__self_attn__v_proj__weight", "model.layers.30.self_attn.o_proj.weight": "llamaforcausallm__model__layers__30__self_attn__o_proj__weight", "model.layers.30.mlp.gate_proj.weight": "llamaforcausallm__model__layers__30__mlp__gate_proj__weight", "model.layers.30.mlp.up_proj.weight": "llamaforcausallm__model__layers__30__mlp__up_proj__weight", "model.layers.30.mlp.down_proj.weight": "llamaforcausallm__model__layers__30__mlp__down_proj__weight", "model.layers.30.input_layernorm.weight": "llamaforcausallm__model__layers__30__input_layernorm__weight", "model.layers.30.post_attention_layernorm.weight": "llamaforcausallm__model__layers__30__post_attention_layernorm__weight", "model.layers.31.self_attn.q_proj.weight": "llamaforcausallm__model__layers__31__self_attn__q_proj__weight", "model.layers.31.self_attn.k_proj.weight": "llamaforcausallm__model__layers__31__self_attn__k_proj__weight", "model.layers.31.self_attn.v_proj.weight": "llamaforcausallm__model__layers__31__self_attn__v_proj__weight", "model.layers.31.self_attn.o_proj.weight": "llamaforcausallm__model__layers__31__self_attn__o_proj__weight", "model.layers.31.mlp.gate_proj.weight": "llamaforcausallm__model__layers__31__mlp__gate_proj__weight", "model.layers.31.mlp.up_proj.weight": "llamaforcausallm__model__layers__31__mlp__up_proj__weight", "model.layers.31.mlp.down_proj.weight": "llamaforcausallm__model__layers__31__mlp__down_proj__weight", "model.layers.31.input_layernorm.weight": "llamaforcausallm__model__layers__31__input_layernorm__weight", "model.layers.31.post_attention_layernorm.weight": "llamaforcausallm__model__layers__31__post_attention_layernorm__weight", "model.layers.32.self_attn.q_proj.weight": "llamaforcausallm__model__layers__32__self_attn__q_proj__weight", "model.layers.32.self_attn.k_proj.weight": "llamaforcausallm__model__layers__32__self_attn__k_proj__weight", "model.layers.32.self_attn.v_proj.weight": "llamaforcausallm__model__layers__32__self_attn__v_proj__weight", "model.layers.32.self_attn.o_proj.weight": "llamaforcausallm__model__layers__32__self_attn__o_proj__weight", "model.layers.32.mlp.gate_proj.weight": "llamaforcausallm__model__layers__32__mlp__gate_proj__weight", "model.layers.32.mlp.up_proj.weight": "llamaforcausallm__model__layers__32__mlp__up_proj__weight", "model.layers.32.mlp.down_proj.weight": "llamaforcausallm__model__layers__32__mlp__down_proj__weight", "model.layers.32.input_layernorm.weight": "llamaforcausallm__model__layers__32__input_layernorm__weight", "model.layers.32.post_attention_layernorm.weight": "llamaforcausallm__model__layers__32__post_attention_layernorm__weight", "model.layers.33.self_attn.q_proj.weight": "llamaforcausallm__model__layers__33__self_attn__q_proj__weight", "model.layers.33.self_attn.k_proj.weight": "llamaforcausallm__model__layers__33__self_attn__k_proj__weight", "model.layers.33.self_attn.v_proj.weight": "llamaforcausallm__model__layers__33__self_attn__v_proj__weight", "model.layers.33.self_attn.o_proj.weight": "llamaforcausallm__model__layers__33__self_attn__o_proj__weight", "model.layers.33.mlp.gate_proj.weight": "llamaforcausallm__model__layers__33__mlp__gate_proj__weight", "model.layers.33.mlp.up_proj.weight": "llamaforcausallm__model__layers__33__mlp__up_proj__weight", "model.layers.33.mlp.down_proj.weight": "llamaforcausallm__model__layers__33__mlp__down_proj__weight", "model.layers.33.input_layernorm.weight": "llamaforcausallm__model__layers__33__input_layernorm__weight", "model.layers.33.post_attention_layernorm.weight": "llamaforcausallm__model__layers__33__post_attention_layernorm__weight", "model.layers.34.self_attn.q_proj.weight": "llamaforcausallm__model__layers__34__self_attn__q_proj__weight", "model.layers.34.self_attn.k_proj.weight": "llamaforcausallm__model__layers__34__self_attn__k_proj__weight", "model.layers.34.self_attn.v_proj.weight": "llamaforcausallm__model__layers__34__self_attn__v_proj__weight", "model.layers.34.self_attn.o_proj.weight": "llamaforcausallm__model__layers__34__self_attn__o_proj__weight", "model.layers.34.mlp.gate_proj.weight": "llamaforcausallm__model__layers__34__mlp__gate_proj__weight", "model.layers.34.mlp.up_proj.weight": "llamaforcausallm__model__layers__34__mlp__up_proj__weight", "model.layers.34.mlp.down_proj.weight": "llamaforcausallm__model__layers__34__mlp__down_proj__weight", "model.layers.34.input_layernorm.weight": "llamaforcausallm__model__layers__34__input_layernorm__weight", "model.layers.34.post_attention_layernorm.weight": "llamaforcausallm__model__layers__34__post_attention_layernorm__weight", "model.layers.35.self_attn.q_proj.weight": "llamaforcausallm__model__layers__35__self_attn__q_proj__weight", "model.layers.35.self_attn.k_proj.weight": "llamaforcausallm__model__layers__35__self_attn__k_proj__weight", "model.layers.35.self_attn.v_proj.weight": "llamaforcausallm__model__layers__35__self_attn__v_proj__weight", "model.layers.35.self_attn.o_proj.weight": "llamaforcausallm__model__layers__35__self_attn__o_proj__weight", "model.layers.35.mlp.gate_proj.weight": "llamaforcausallm__model__layers__35__mlp__gate_proj__weight", "model.layers.35.mlp.up_proj.weight": "llamaforcausallm__model__layers__35__mlp__up_proj__weight", "model.layers.35.mlp.down_proj.weight": "llamaforcausallm__model__layers__35__mlp__down_proj__weight", "model.layers.35.input_layernorm.weight": "llamaforcausallm__model__layers__35__input_layernorm__weight", "model.layers.35.post_attention_layernorm.weight": "llamaforcausallm__model__layers__35__post_attention_layernorm__weight", "model.layers.36.self_attn.q_proj.weight": "llamaforcausallm__model__layers__36__self_attn__q_proj__weight", "model.layers.36.self_attn.k_proj.weight": "llamaforcausallm__model__layers__36__self_attn__k_proj__weight", "model.layers.36.self_attn.v_proj.weight": "llamaforcausallm__model__layers__36__self_attn__v_proj__weight", "model.layers.36.self_attn.o_proj.weight": "llamaforcausallm__model__layers__36__self_attn__o_proj__weight", "model.layers.36.mlp.gate_proj.weight": "llamaforcausallm__model__layers__36__mlp__gate_proj__weight", "model.layers.36.mlp.up_proj.weight": "llamaforcausallm__model__layers__36__mlp__up_proj__weight", "model.layers.36.mlp.down_proj.weight": "llamaforcausallm__model__layers__36__mlp__down_proj__weight", "model.layers.36.input_layernorm.weight": "llamaforcausallm__model__layers__36__input_layernorm__weight", "model.layers.36.post_attention_layernorm.weight": "llamaforcausallm__model__layers__36__post_attention_layernorm__weight", "model.layers.37.self_attn.q_proj.weight": "llamaforcausallm__model__layers__37__self_attn__q_proj__weight", "model.layers.37.self_attn.k_proj.weight": "llamaforcausallm__model__layers__37__self_attn__k_proj__weight", "model.layers.37.self_attn.v_proj.weight": "llamaforcausallm__model__layers__37__self_attn__v_proj__weight", "model.layers.37.self_attn.o_proj.weight": "llamaforcausallm__model__layers__37__self_attn__o_proj__weight", "model.layers.37.mlp.gate_proj.weight": "llamaforcausallm__model__layers__37__mlp__gate_proj__weight", "model.layers.37.mlp.up_proj.weight": "llamaforcausallm__model__layers__37__mlp__up_proj__weight", "model.layers.37.mlp.down_proj.weight": "llamaforcausallm__model__layers__37__mlp__down_proj__weight", "model.layers.37.input_layernorm.weight": "llamaforcausallm__model__layers__37__input_layernorm__weight", "model.layers.37.post_attention_layernorm.weight": "llamaforcausallm__model__layers__37__post_attention_layernorm__weight", "model.layers.38.self_attn.q_proj.weight": "llamaforcausallm__model__layers__38__self_attn__q_proj__weight", "model.layers.38.self_attn.k_proj.weight": "llamaforcausallm__model__layers__38__self_attn__k_proj__weight", "model.layers.38.self_attn.v_proj.weight": "llamaforcausallm__model__layers__38__self_attn__v_proj__weight", "model.layers.38.self_attn.o_proj.weight": "llamaforcausallm__model__layers__38__self_attn__o_proj__weight", "model.layers.38.mlp.gate_proj.weight": "llamaforcausallm__model__layers__38__mlp__gate_proj__weight", "model.layers.38.mlp.up_proj.weight": "llamaforcausallm__model__layers__38__mlp__up_proj__weight", "model.layers.38.mlp.down_proj.weight": "llamaforcausallm__model__layers__38__mlp__down_proj__weight", "model.layers.38.input_layernorm.weight": "llamaforcausallm__model__layers__38__input_layernorm__weight", "model.layers.38.post_attention_layernorm.weight": "llamaforcausallm__model__layers__38__post_attention_layernorm__weight", "model.layers.39.self_attn.q_proj.weight": "llamaforcausallm__model__layers__39__self_attn__q_proj__weight", "model.layers.39.self_attn.k_proj.weight": "llamaforcausallm__model__layers__39__self_attn__k_proj__weight", "model.layers.39.self_attn.v_proj.weight": "llamaforcausallm__model__layers__39__self_attn__v_proj__weight", "model.layers.39.self_attn.o_proj.weight": "llamaforcausallm__model__layers__39__self_attn__o_proj__weight", "model.layers.39.mlp.gate_proj.weight": "llamaforcausallm__model__layers__39__mlp__gate_proj__weight", "model.layers.39.mlp.up_proj.weight": "llamaforcausallm__model__layers__39__mlp__up_proj__weight", "model.layers.39.mlp.down_proj.weight": "llamaforcausallm__model__layers__39__mlp__down_proj__weight", "model.layers.39.input_layernorm.weight": "llamaforcausallm__model__layers__39__input_layernorm__weight", "model.layers.39.post_attention_layernorm.weight": "llamaforcausallm__model__layers__39__post_attention_layernorm__weight", "model.layers.40.self_attn.q_proj.weight": "llamaforcausallm__model__layers__40__self_attn__q_proj__weight", "model.layers.40.self_attn.k_proj.weight": "llamaforcausallm__model__layers__40__self_attn__k_proj__weight", "model.layers.40.self_attn.v_proj.weight": "llamaforcausallm__model__layers__40__self_attn__v_proj__weight", "model.layers.40.self_attn.o_proj.weight": "llamaforcausallm__model__layers__40__self_attn__o_proj__weight", "model.layers.40.mlp.gate_proj.weight": "llamaforcausallm__model__layers__40__mlp__gate_proj__weight", "model.layers.40.mlp.up_proj.weight": "llamaforcausallm__model__layers__40__mlp__up_proj__weight", "model.layers.40.mlp.down_proj.weight": "llamaforcausallm__model__layers__40__mlp__down_proj__weight", "model.layers.40.input_layernorm.weight": "llamaforcausallm__model__layers__40__input_layernorm__weight", "model.layers.40.post_attention_layernorm.weight": "llamaforcausallm__model__layers__40__post_attention_layernorm__weight", "model.layers.41.self_attn.q_proj.weight": "llamaforcausallm__model__layers__41__self_attn__q_proj__weight", "model.layers.41.self_attn.k_proj.weight": "llamaforcausallm__model__layers__41__self_attn__k_proj__weight", "model.layers.41.self_attn.v_proj.weight": "llamaforcausallm__model__layers__41__self_attn__v_proj__weight", "model.layers.41.self_attn.o_proj.weight": "llamaforcausallm__model__layers__41__self_attn__o_proj__weight", "model.layers.41.mlp.gate_proj.weight": "llamaforcausallm__model__layers__41__mlp__gate_proj__weight", "model.layers.41.mlp.up_proj.weight": "llamaforcausallm__model__layers__41__mlp__up_proj__weight", "model.layers.41.mlp.down_proj.weight": "llamaforcausallm__model__layers__41__mlp__down_proj__weight", "model.layers.41.input_layernorm.weight": "llamaforcausallm__model__layers__41__input_layernorm__weight", "model.layers.41.post_attention_layernorm.weight": "llamaforcausallm__model__layers__41__post_attention_layernorm__weight", "model.layers.42.self_attn.q_proj.weight": "llamaforcausallm__model__layers__42__self_attn__q_proj__weight", "model.layers.42.self_attn.k_proj.weight": "llamaforcausallm__model__layers__42__self_attn__k_proj__weight", "model.layers.42.self_attn.v_proj.weight": "llamaforcausallm__model__layers__42__self_attn__v_proj__weight", "model.layers.42.self_attn.o_proj.weight": "llamaforcausallm__model__layers__42__self_attn__o_proj__weight", "model.layers.42.mlp.gate_proj.weight": "llamaforcausallm__model__layers__42__mlp__gate_proj__weight", "model.layers.42.mlp.up_proj.weight": "llamaforcausallm__model__layers__42__mlp__up_proj__weight", "model.layers.42.mlp.down_proj.weight": "llamaforcausallm__model__layers__42__mlp__down_proj__weight", "model.layers.42.input_layernorm.weight": "llamaforcausallm__model__layers__42__input_layernorm__weight", "model.layers.42.post_attention_layernorm.weight": "llamaforcausallm__model__layers__42__post_attention_layernorm__weight", "model.layers.43.self_attn.q_proj.weight": "llamaforcausallm__model__layers__43__self_attn__q_proj__weight", "model.layers.43.self_attn.k_proj.weight": "llamaforcausallm__model__layers__43__self_attn__k_proj__weight", "model.layers.43.self_attn.v_proj.weight": "llamaforcausallm__model__layers__43__self_attn__v_proj__weight", "model.layers.43.self_attn.o_proj.weight": "llamaforcausallm__model__layers__43__self_attn__o_proj__weight", "model.layers.43.mlp.gate_proj.weight": "llamaforcausallm__model__layers__43__mlp__gate_proj__weight", "model.layers.43.mlp.up_proj.weight": "llamaforcausallm__model__layers__43__mlp__up_proj__weight", "model.layers.43.mlp.down_proj.weight": "llamaforcausallm__model__layers__43__mlp__down_proj__weight", "model.layers.43.input_layernorm.weight": "llamaforcausallm__model__layers__43__input_layernorm__weight", "model.layers.43.post_attention_layernorm.weight": "llamaforcausallm__model__layers__43__post_attention_layernorm__weight", "model.layers.44.self_attn.q_proj.weight": "llamaforcausallm__model__layers__44__self_attn__q_proj__weight", "model.layers.44.self_attn.k_proj.weight": "llamaforcausallm__model__layers__44__self_attn__k_proj__weight", "model.layers.44.self_attn.v_proj.weight": "llamaforcausallm__model__layers__44__self_attn__v_proj__weight", "model.layers.44.self_attn.o_proj.weight": "llamaforcausallm__model__layers__44__self_attn__o_proj__weight", "model.layers.44.mlp.gate_proj.weight": "llamaforcausallm__model__layers__44__mlp__gate_proj__weight", "model.layers.44.mlp.up_proj.weight": "llamaforcausallm__model__layers__44__mlp__up_proj__weight", "model.layers.44.mlp.down_proj.weight": "llamaforcausallm__model__layers__44__mlp__down_proj__weight", "model.layers.44.input_layernorm.weight": "llamaforcausallm__model__layers__44__input_layernorm__weight", "model.layers.44.post_attention_layernorm.weight": "llamaforcausallm__model__layers__44__post_attention_layernorm__weight", "model.layers.45.self_attn.q_proj.weight": "llamaforcausallm__model__layers__45__self_attn__q_proj__weight", "model.layers.45.self_attn.k_proj.weight": "llamaforcausallm__model__layers__45__self_attn__k_proj__weight", "model.layers.45.self_attn.v_proj.weight": "llamaforcausallm__model__layers__45__self_attn__v_proj__weight", "model.layers.45.self_attn.o_proj.weight": "llamaforcausallm__model__layers__45__self_attn__o_proj__weight", "model.layers.45.mlp.gate_proj.weight": "llamaforcausallm__model__layers__45__mlp__gate_proj__weight", "model.layers.45.mlp.up_proj.weight": "llamaforcausallm__model__layers__45__mlp__up_proj__weight", "model.layers.45.mlp.down_proj.weight": "llamaforcausallm__model__layers__45__mlp__down_proj__weight", "model.layers.45.input_layernorm.weight": "llamaforcausallm__model__layers__45__input_layernorm__weight", "model.layers.45.post_attention_layernorm.weight": "llamaforcausallm__model__layers__45__post_attention_layernorm__weight", "model.layers.46.self_attn.q_proj.weight": "llamaforcausallm__model__layers__46__self_attn__q_proj__weight", "model.layers.46.self_attn.k_proj.weight": "llamaforcausallm__model__layers__46__self_attn__k_proj__weight", "model.layers.46.self_attn.v_proj.weight": "llamaforcausallm__model__layers__46__self_attn__v_proj__weight", "model.layers.46.self_attn.o_proj.weight": "llamaforcausallm__model__layers__46__self_attn__o_proj__weight", "model.layers.46.mlp.gate_proj.weight": "llamaforcausallm__model__layers__46__mlp__gate_proj__weight", "model.layers.46.mlp.up_proj.weight": "llamaforcausallm__model__layers__46__mlp__up_proj__weight", "model.layers.46.mlp.down_proj.weight": "llamaforcausallm__model__layers__46__mlp__down_proj__weight", "model.layers.46.input_layernorm.weight": "llamaforcausallm__model__layers__46__input_layernorm__weight", "model.layers.46.post_attention_layernorm.weight": "llamaforcausallm__model__layers__46__post_attention_layernorm__weight", "model.layers.47.self_attn.q_proj.weight": "llamaforcausallm__model__layers__47__self_attn__q_proj__weight", "model.layers.47.self_attn.k_proj.weight": "llamaforcausallm__model__layers__47__self_attn__k_proj__weight", "model.layers.47.self_attn.v_proj.weight": "llamaforcausallm__model__layers__47__self_attn__v_proj__weight", "model.layers.47.self_attn.o_proj.weight": "llamaforcausallm__model__layers__47__self_attn__o_proj__weight", "model.layers.47.mlp.gate_proj.weight": "llamaforcausallm__model__layers__47__mlp__gate_proj__weight", "model.layers.47.mlp.up_proj.weight": "llamaforcausallm__model__layers__47__mlp__up_proj__weight", "model.layers.47.mlp.down_proj.weight": "llamaforcausallm__model__layers__47__mlp__down_proj__weight", "model.layers.47.input_layernorm.weight": "llamaforcausallm__model__layers__47__input_layernorm__weight", "model.layers.47.post_attention_layernorm.weight": "llamaforcausallm__model__layers__47__post_attention_layernorm__weight", "model.layers.48.self_attn.q_proj.weight": "llamaforcausallm__model__layers__48__self_attn__q_proj__weight", "model.layers.48.self_attn.k_proj.weight": "llamaforcausallm__model__layers__48__self_attn__k_proj__weight", "model.layers.48.self_attn.v_proj.weight": "llamaforcausallm__model__layers__48__self_attn__v_proj__weight", "model.layers.48.self_attn.o_proj.weight": "llamaforcausallm__model__layers__48__self_attn__o_proj__weight", "model.layers.48.mlp.gate_proj.weight": "llamaforcausallm__model__layers__48__mlp__gate_proj__weight", "model.layers.48.mlp.up_proj.weight": "llamaforcausallm__model__layers__48__mlp__up_proj__weight", "model.layers.48.mlp.down_proj.weight": "llamaforcausallm__model__layers__48__mlp__down_proj__weight", "model.layers.48.input_layernorm.weight": "llamaforcausallm__model__layers__48__input_layernorm__weight", "model.layers.48.post_attention_layernorm.weight": "llamaforcausallm__model__layers__48__post_attention_layernorm__weight", "model.layers.49.self_attn.q_proj.weight": "llamaforcausallm__model__layers__49__self_attn__q_proj__weight", "model.layers.49.self_attn.k_proj.weight": "llamaforcausallm__model__layers__49__self_attn__k_proj__weight", "model.layers.49.self_attn.v_proj.weight": "llamaforcausallm__model__layers__49__self_attn__v_proj__weight", "model.layers.49.self_attn.o_proj.weight": "llamaforcausallm__model__layers__49__self_attn__o_proj__weight", "model.layers.49.mlp.gate_proj.weight": "llamaforcausallm__model__layers__49__mlp__gate_proj__weight", "model.layers.49.mlp.up_proj.weight": "llamaforcausallm__model__layers__49__mlp__up_proj__weight", "model.layers.49.mlp.down_proj.weight": "llamaforcausallm__model__layers__49__mlp__down_proj__weight", "model.layers.49.input_layernorm.weight": "llamaforcausallm__model__layers__49__input_layernorm__weight", "model.layers.49.post_attention_layernorm.weight": "llamaforcausallm__model__layers__49__post_attention_layernorm__weight", "model.layers.50.self_attn.q_proj.weight": "llamaforcausallm__model__layers__50__self_attn__q_proj__weight", "model.layers.50.self_attn.k_proj.weight": "llamaforcausallm__model__layers__50__self_attn__k_proj__weight", "model.layers.50.self_attn.v_proj.weight": "llamaforcausallm__model__layers__50__self_attn__v_proj__weight", "model.layers.50.self_attn.o_proj.weight": "llamaforcausallm__model__layers__50__self_attn__o_proj__weight", "model.layers.50.mlp.gate_proj.weight": "llamaforcausallm__model__layers__50__mlp__gate_proj__weight", "model.layers.50.mlp.up_proj.weight": "llamaforcausallm__model__layers__50__mlp__up_proj__weight", "model.layers.50.mlp.down_proj.weight": "llamaforcausallm__model__layers__50__mlp__down_proj__weight", "model.layers.50.input_layernorm.weight": "llamaforcausallm__model__layers__50__input_layernorm__weight", "model.layers.50.post_attention_layernorm.weight": "llamaforcausallm__model__layers__50__post_attention_layernorm__weight", "model.layers.51.self_attn.q_proj.weight": "llamaforcausallm__model__layers__51__self_attn__q_proj__weight", "model.layers.51.self_attn.k_proj.weight": "llamaforcausallm__model__layers__51__self_attn__k_proj__weight", "model.layers.51.self_attn.v_proj.weight": "llamaforcausallm__model__layers__51__self_attn__v_proj__weight", "model.layers.51.self_attn.o_proj.weight": "llamaforcausallm__model__layers__51__self_attn__o_proj__weight", "model.layers.51.mlp.gate_proj.weight": "llamaforcausallm__model__layers__51__mlp__gate_proj__weight", "model.layers.51.mlp.up_proj.weight": "llamaforcausallm__model__layers__51__mlp__up_proj__weight", "model.layers.51.mlp.down_proj.weight": "llamaforcausallm__model__layers__51__mlp__down_proj__weight", "model.layers.51.input_layernorm.weight": "llamaforcausallm__model__layers__51__input_layernorm__weight", "model.layers.51.post_attention_layernorm.weight": "llamaforcausallm__model__layers__51__post_attention_layernorm__weight", "model.layers.52.self_attn.q_proj.weight": "llamaforcausallm__model__layers__52__self_attn__q_proj__weight", "model.layers.52.self_attn.k_proj.weight": "llamaforcausallm__model__layers__52__self_attn__k_proj__weight", "model.layers.52.self_attn.v_proj.weight": "llamaforcausallm__model__layers__52__self_attn__v_proj__weight", "model.layers.52.self_attn.o_proj.weight": "llamaforcausallm__model__layers__52__self_attn__o_proj__weight", "model.layers.52.mlp.gate_proj.weight": "llamaforcausallm__model__layers__52__mlp__gate_proj__weight", "model.layers.52.mlp.up_proj.weight": "llamaforcausallm__model__layers__52__mlp__up_proj__weight", "model.layers.52.mlp.down_proj.weight": "llamaforcausallm__model__layers__52__mlp__down_proj__weight", "model.layers.52.input_layernorm.weight": "llamaforcausallm__model__layers__52__input_layernorm__weight", "model.layers.52.post_attention_layernorm.weight": "llamaforcausallm__model__layers__52__post_attention_layernorm__weight", "model.layers.53.self_attn.q_proj.weight": "llamaforcausallm__model__layers__53__self_attn__q_proj__weight", "model.layers.53.self_attn.k_proj.weight": "llamaforcausallm__model__layers__53__self_attn__k_proj__weight", "model.layers.53.self_attn.v_proj.weight": "llamaforcausallm__model__layers__53__self_attn__v_proj__weight", "model.layers.53.self_attn.o_proj.weight": "llamaforcausallm__model__layers__53__self_attn__o_proj__weight", "model.layers.53.mlp.gate_proj.weight": "llamaforcausallm__model__layers__53__mlp__gate_proj__weight", "model.layers.53.mlp.up_proj.weight": "llamaforcausallm__model__layers__53__mlp__up_proj__weight", "model.layers.53.mlp.down_proj.weight": "llamaforcausallm__model__layers__53__mlp__down_proj__weight", "model.layers.53.input_layernorm.weight": "llamaforcausallm__model__layers__53__input_layernorm__weight", "model.layers.53.post_attention_layernorm.weight": "llamaforcausallm__model__layers__53__post_attention_layernorm__weight", "model.layers.54.self_attn.q_proj.weight": "llamaforcausallm__model__layers__54__self_attn__q_proj__weight", "model.layers.54.self_attn.k_proj.weight": "llamaforcausallm__model__layers__54__self_attn__k_proj__weight", "model.layers.54.self_attn.v_proj.weight": "llamaforcausallm__model__layers__54__self_attn__v_proj__weight", "model.layers.54.self_attn.o_proj.weight": "llamaforcausallm__model__layers__54__self_attn__o_proj__weight", "model.layers.54.mlp.gate_proj.weight": "llamaforcausallm__model__layers__54__mlp__gate_proj__weight", "model.layers.54.mlp.up_proj.weight": "llamaforcausallm__model__layers__54__mlp__up_proj__weight", "model.layers.54.mlp.down_proj.weight": "llamaforcausallm__model__layers__54__mlp__down_proj__weight", "model.layers.54.input_layernorm.weight": "llamaforcausallm__model__layers__54__input_layernorm__weight", "model.layers.54.post_attention_layernorm.weight": "llamaforcausallm__model__layers__54__post_attention_layernorm__weight", "model.layers.55.self_attn.q_proj.weight": "llamaforcausallm__model__layers__55__self_attn__q_proj__weight", "model.layers.55.self_attn.k_proj.weight": "llamaforcausallm__model__layers__55__self_attn__k_proj__weight", "model.layers.55.self_attn.v_proj.weight": "llamaforcausallm__model__layers__55__self_attn__v_proj__weight", "model.layers.55.self_attn.o_proj.weight": "llamaforcausallm__model__layers__55__self_attn__o_proj__weight", "model.layers.55.mlp.gate_proj.weight": "llamaforcausallm__model__layers__55__mlp__gate_proj__weight", "model.layers.55.mlp.up_proj.weight": "llamaforcausallm__model__layers__55__mlp__up_proj__weight", "model.layers.55.mlp.down_proj.weight": "llamaforcausallm__model__layers__55__mlp__down_proj__weight", "model.layers.55.input_layernorm.weight": "llamaforcausallm__model__layers__55__input_layernorm__weight", "model.layers.55.post_attention_layernorm.weight": "llamaforcausallm__model__layers__55__post_attention_layernorm__weight", "model.layers.56.self_attn.q_proj.weight": "llamaforcausallm__model__layers__56__self_attn__q_proj__weight", "model.layers.56.self_attn.k_proj.weight": "llamaforcausallm__model__layers__56__self_attn__k_proj__weight", "model.layers.56.self_attn.v_proj.weight": "llamaforcausallm__model__layers__56__self_attn__v_proj__weight", "model.layers.56.self_attn.o_proj.weight": "llamaforcausallm__model__layers__56__self_attn__o_proj__weight", "model.layers.56.mlp.gate_proj.weight": "llamaforcausallm__model__layers__56__mlp__gate_proj__weight", "model.layers.56.mlp.up_proj.weight": "llamaforcausallm__model__layers__56__mlp__up_proj__weight", "model.layers.56.mlp.down_proj.weight": "llamaforcausallm__model__layers__56__mlp__down_proj__weight", "model.layers.56.input_layernorm.weight": "llamaforcausallm__model__layers__56__input_layernorm__weight", "model.layers.56.post_attention_layernorm.weight": "llamaforcausallm__model__layers__56__post_attention_layernorm__weight", "model.layers.57.self_attn.q_proj.weight": "llamaforcausallm__model__layers__57__self_attn__q_proj__weight", "model.layers.57.self_attn.k_proj.weight": "llamaforcausallm__model__layers__57__self_attn__k_proj__weight", "model.layers.57.self_attn.v_proj.weight": "llamaforcausallm__model__layers__57__self_attn__v_proj__weight", "model.layers.57.self_attn.o_proj.weight": "llamaforcausallm__model__layers__57__self_attn__o_proj__weight", "model.layers.57.mlp.gate_proj.weight": "llamaforcausallm__model__layers__57__mlp__gate_proj__weight", "model.layers.57.mlp.up_proj.weight": "llamaforcausallm__model__layers__57__mlp__up_proj__weight", "model.layers.57.mlp.down_proj.weight": "llamaforcausallm__model__layers__57__mlp__down_proj__weight", "model.layers.57.input_layernorm.weight": "llamaforcausallm__model__layers__57__input_layernorm__weight", "model.layers.57.post_attention_layernorm.weight": "llamaforcausallm__model__layers__57__post_attention_layernorm__weight", "model.layers.58.self_attn.q_proj.weight": "llamaforcausallm__model__layers__58__self_attn__q_proj__weight", "model.layers.58.self_attn.k_proj.weight": "llamaforcausallm__model__layers__58__self_attn__k_proj__weight", "model.layers.58.self_attn.v_proj.weight": "llamaforcausallm__model__layers__58__self_attn__v_proj__weight", "model.layers.58.self_attn.o_proj.weight": "llamaforcausallm__model__layers__58__self_attn__o_proj__weight", "model.layers.58.mlp.gate_proj.weight": "llamaforcausallm__model__layers__58__mlp__gate_proj__weight", "model.layers.58.mlp.up_proj.weight": "llamaforcausallm__model__layers__58__mlp__up_proj__weight", "model.layers.58.mlp.down_proj.weight": "llamaforcausallm__model__layers__58__mlp__down_proj__weight", "model.layers.58.input_layernorm.weight": "llamaforcausallm__model__layers__58__input_layernorm__weight", "model.layers.58.post_attention_layernorm.weight": "llamaforcausallm__model__layers__58__post_attention_layernorm__weight", "model.layers.59.self_attn.q_proj.weight": "llamaforcausallm__model__layers__59__self_attn__q_proj__weight", "model.layers.59.self_attn.k_proj.weight": "llamaforcausallm__model__layers__59__self_attn__k_proj__weight", "model.layers.59.self_attn.v_proj.weight": "llamaforcausallm__model__layers__59__self_attn__v_proj__weight", "model.layers.59.self_attn.o_proj.weight": "llamaforcausallm__model__layers__59__self_attn__o_proj__weight", "model.layers.59.mlp.gate_proj.weight": "llamaforcausallm__model__layers__59__mlp__gate_proj__weight", "model.layers.59.mlp.up_proj.weight": "llamaforcausallm__model__layers__59__mlp__up_proj__weight", "model.layers.59.mlp.down_proj.weight": "llamaforcausallm__model__layers__59__mlp__down_proj__weight", "model.layers.59.input_layernorm.weight": "llamaforcausallm__model__layers__59__input_layernorm__weight", "model.layers.59.post_attention_layernorm.weight": "llamaforcausallm__model__layers__59__post_attention_layernorm__weight", "model.layers.60.self_attn.q_proj.weight": "llamaforcausallm__model__layers__60__self_attn__q_proj__weight", "model.layers.60.self_attn.k_proj.weight": "llamaforcausallm__model__layers__60__self_attn__k_proj__weight", "model.layers.60.self_attn.v_proj.weight": "llamaforcausallm__model__layers__60__self_attn__v_proj__weight", "model.layers.60.self_attn.o_proj.weight": "llamaforcausallm__model__layers__60__self_attn__o_proj__weight", "model.layers.60.mlp.gate_proj.weight": "llamaforcausallm__model__layers__60__mlp__gate_proj__weight", "model.layers.60.mlp.up_proj.weight": "llamaforcausallm__model__layers__60__mlp__up_proj__weight", "model.layers.60.mlp.down_proj.weight": "llamaforcausallm__model__layers__60__mlp__down_proj__weight", "model.layers.60.input_layernorm.weight": "llamaforcausallm__model__layers__60__input_layernorm__weight", "model.layers.60.post_attention_layernorm.weight": "llamaforcausallm__model__layers__60__post_attention_layernorm__weight", "model.layers.61.self_attn.q_proj.weight": "llamaforcausallm__model__layers__61__self_attn__q_proj__weight", "model.layers.61.self_attn.k_proj.weight": "llamaforcausallm__model__layers__61__self_attn__k_proj__weight", "model.layers.61.self_attn.v_proj.weight": "llamaforcausallm__model__layers__61__self_attn__v_proj__weight", "model.layers.61.self_attn.o_proj.weight": "llamaforcausallm__model__layers__61__self_attn__o_proj__weight", "model.layers.61.mlp.gate_proj.weight": "llamaforcausallm__model__layers__61__mlp__gate_proj__weight", "model.layers.61.mlp.up_proj.weight": "llamaforcausallm__model__layers__61__mlp__up_proj__weight", "model.layers.61.mlp.down_proj.weight": "llamaforcausallm__model__layers__61__mlp__down_proj__weight", "model.layers.61.input_layernorm.weight": "llamaforcausallm__model__layers__61__input_layernorm__weight", "model.layers.61.post_attention_layernorm.weight": "llamaforcausallm__model__layers__61__post_attention_layernorm__weight", "model.layers.62.self_attn.q_proj.weight": "llamaforcausallm__model__layers__62__self_attn__q_proj__weight", "model.layers.62.self_attn.k_proj.weight": "llamaforcausallm__model__layers__62__self_attn__k_proj__weight", "model.layers.62.self_attn.v_proj.weight": "llamaforcausallm__model__layers__62__self_attn__v_proj__weight", "model.layers.62.self_attn.o_proj.weight": "llamaforcausallm__model__layers__62__self_attn__o_proj__weight", "model.layers.62.mlp.gate_proj.weight": "llamaforcausallm__model__layers__62__mlp__gate_proj__weight", "model.layers.62.mlp.up_proj.weight": "llamaforcausallm__model__layers__62__mlp__up_proj__weight", "model.layers.62.mlp.down_proj.weight": "llamaforcausallm__model__layers__62__mlp__down_proj__weight", "model.layers.62.input_layernorm.weight": "llamaforcausallm__model__layers__62__input_layernorm__weight", "model.layers.62.post_attention_layernorm.weight": "llamaforcausallm__model__layers__62__post_attention_layernorm__weight", "model.layers.63.self_attn.q_proj.weight": "llamaforcausallm__model__layers__63__self_attn__q_proj__weight", "model.layers.63.self_attn.k_proj.weight": "llamaforcausallm__model__layers__63__self_attn__k_proj__weight", "model.layers.63.self_attn.v_proj.weight": "llamaforcausallm__model__layers__63__self_attn__v_proj__weight", "model.layers.63.self_attn.o_proj.weight": "llamaforcausallm__model__layers__63__self_attn__o_proj__weight", "model.layers.63.mlp.gate_proj.weight": "llamaforcausallm__model__layers__63__mlp__gate_proj__weight", "model.layers.63.mlp.up_proj.weight": "llamaforcausallm__model__layers__63__mlp__up_proj__weight", "model.layers.63.mlp.down_proj.weight": "llamaforcausallm__model__layers__63__mlp__down_proj__weight", "model.layers.63.input_layernorm.weight": "llamaforcausallm__model__layers__63__input_layernorm__weight", "model.layers.63.post_attention_layernorm.weight": "llamaforcausallm__model__layers__63__post_attention_layernorm__weight", "model.layers.64.self_attn.q_proj.weight": "llamaforcausallm__model__layers__64__self_attn__q_proj__weight", "model.layers.64.self_attn.k_proj.weight": "llamaforcausallm__model__layers__64__self_attn__k_proj__weight", "model.layers.64.self_attn.v_proj.weight": "llamaforcausallm__model__layers__64__self_attn__v_proj__weight", "model.layers.64.self_attn.o_proj.weight": "llamaforcausallm__model__layers__64__self_attn__o_proj__weight", "model.layers.64.mlp.gate_proj.weight": "llamaforcausallm__model__layers__64__mlp__gate_proj__weight", "model.layers.64.mlp.up_proj.weight": "llamaforcausallm__model__layers__64__mlp__up_proj__weight", "model.layers.64.mlp.down_proj.weight": "llamaforcausallm__model__layers__64__mlp__down_proj__weight", "model.layers.64.input_layernorm.weight": "llamaforcausallm__model__layers__64__input_layernorm__weight", "model.layers.64.post_attention_layernorm.weight": "llamaforcausallm__model__layers__64__post_attention_layernorm__weight", "model.layers.65.self_attn.q_proj.weight": "llamaforcausallm__model__layers__65__self_attn__q_proj__weight", "model.layers.65.self_attn.k_proj.weight": "llamaforcausallm__model__layers__65__self_attn__k_proj__weight", "model.layers.65.self_attn.v_proj.weight": "llamaforcausallm__model__layers__65__self_attn__v_proj__weight", "model.layers.65.self_attn.o_proj.weight": "llamaforcausallm__model__layers__65__self_attn__o_proj__weight", "model.layers.65.mlp.gate_proj.weight": "llamaforcausallm__model__layers__65__mlp__gate_proj__weight", "model.layers.65.mlp.up_proj.weight": "llamaforcausallm__model__layers__65__mlp__up_proj__weight", "model.layers.65.mlp.down_proj.weight": "llamaforcausallm__model__layers__65__mlp__down_proj__weight", "model.layers.65.input_layernorm.weight": "llamaforcausallm__model__layers__65__input_layernorm__weight", "model.layers.65.post_attention_layernorm.weight": "llamaforcausallm__model__layers__65__post_attention_layernorm__weight", "model.layers.66.self_attn.q_proj.weight": "llamaforcausallm__model__layers__66__self_attn__q_proj__weight", "model.layers.66.self_attn.k_proj.weight": "llamaforcausallm__model__layers__66__self_attn__k_proj__weight", "model.layers.66.self_attn.v_proj.weight": "llamaforcausallm__model__layers__66__self_attn__v_proj__weight", "model.layers.66.self_attn.o_proj.weight": "llamaforcausallm__model__layers__66__self_attn__o_proj__weight", "model.layers.66.mlp.gate_proj.weight": "llamaforcausallm__model__layers__66__mlp__gate_proj__weight", "model.layers.66.mlp.up_proj.weight": "llamaforcausallm__model__layers__66__mlp__up_proj__weight", "model.layers.66.mlp.down_proj.weight": "llamaforcausallm__model__layers__66__mlp__down_proj__weight", "model.layers.66.input_layernorm.weight": "llamaforcausallm__model__layers__66__input_layernorm__weight", "model.layers.66.post_attention_layernorm.weight": "llamaforcausallm__model__layers__66__post_attention_layernorm__weight", "model.layers.67.self_attn.q_proj.weight": "llamaforcausallm__model__layers__67__self_attn__q_proj__weight", "model.layers.67.self_attn.k_proj.weight": "llamaforcausallm__model__layers__67__self_attn__k_proj__weight", "model.layers.67.self_attn.v_proj.weight": "llamaforcausallm__model__layers__67__self_attn__v_proj__weight", "model.layers.67.self_attn.o_proj.weight": "llamaforcausallm__model__layers__67__self_attn__o_proj__weight", "model.layers.67.mlp.gate_proj.weight": "llamaforcausallm__model__layers__67__mlp__gate_proj__weight", "model.layers.67.mlp.up_proj.weight": "llamaforcausallm__model__layers__67__mlp__up_proj__weight", "model.layers.67.mlp.down_proj.weight": "llamaforcausallm__model__layers__67__mlp__down_proj__weight", "model.layers.67.input_layernorm.weight": "llamaforcausallm__model__layers__67__input_layernorm__weight", "model.layers.67.post_attention_layernorm.weight": "llamaforcausallm__model__layers__67__post_attention_layernorm__weight", "model.layers.68.self_attn.q_proj.weight": "llamaforcausallm__model__layers__68__self_attn__q_proj__weight", "model.layers.68.self_attn.k_proj.weight": "llamaforcausallm__model__layers__68__self_attn__k_proj__weight", "model.layers.68.self_attn.v_proj.weight": "llamaforcausallm__model__layers__68__self_attn__v_proj__weight", "model.layers.68.self_attn.o_proj.weight": "llamaforcausallm__model__layers__68__self_attn__o_proj__weight", "model.layers.68.mlp.gate_proj.weight": "llamaforcausallm__model__layers__68__mlp__gate_proj__weight", "model.layers.68.mlp.up_proj.weight": "llamaforcausallm__model__layers__68__mlp__up_proj__weight", "model.layers.68.mlp.down_proj.weight": "llamaforcausallm__model__layers__68__mlp__down_proj__weight", "model.layers.68.input_layernorm.weight": "llamaforcausallm__model__layers__68__input_layernorm__weight", "model.layers.68.post_attention_layernorm.weight": "llamaforcausallm__model__layers__68__post_attention_layernorm__weight", "model.layers.69.self_attn.q_proj.weight": "llamaforcausallm__model__layers__69__self_attn__q_proj__weight", "model.layers.69.self_attn.k_proj.weight": "llamaforcausallm__model__layers__69__self_attn__k_proj__weight", "model.layers.69.self_attn.v_proj.weight": "llamaforcausallm__model__layers__69__self_attn__v_proj__weight", "model.layers.69.self_attn.o_proj.weight": "llamaforcausallm__model__layers__69__self_attn__o_proj__weight", "model.layers.69.mlp.gate_proj.weight": "llamaforcausallm__model__layers__69__mlp__gate_proj__weight", "model.layers.69.mlp.up_proj.weight": "llamaforcausallm__model__layers__69__mlp__up_proj__weight", "model.layers.69.mlp.down_proj.weight": "llamaforcausallm__model__layers__69__mlp__down_proj__weight", "model.layers.69.input_layernorm.weight": "llamaforcausallm__model__layers__69__input_layernorm__weight", "model.layers.69.post_attention_layernorm.weight": "llamaforcausallm__model__layers__69__post_attention_layernorm__weight", "model.layers.70.self_attn.q_proj.weight": "llamaforcausallm__model__layers__70__self_attn__q_proj__weight", "model.layers.70.self_attn.k_proj.weight": "llamaforcausallm__model__layers__70__self_attn__k_proj__weight", "model.layers.70.self_attn.v_proj.weight": "llamaforcausallm__model__layers__70__self_attn__v_proj__weight", "model.layers.70.self_attn.o_proj.weight": "llamaforcausallm__model__layers__70__self_attn__o_proj__weight", "model.layers.70.mlp.gate_proj.weight": "llamaforcausallm__model__layers__70__mlp__gate_proj__weight", "model.layers.70.mlp.up_proj.weight": "llamaforcausallm__model__layers__70__mlp__up_proj__weight", "model.layers.70.mlp.down_proj.weight": "llamaforcausallm__model__layers__70__mlp__down_proj__weight", "model.layers.70.input_layernorm.weight": "llamaforcausallm__model__layers__70__input_layernorm__weight", "model.layers.70.post_attention_layernorm.weight": "llamaforcausallm__model__layers__70__post_attention_layernorm__weight", "model.layers.71.self_attn.q_proj.weight": "llamaforcausallm__model__layers__71__self_attn__q_proj__weight", "model.layers.71.self_attn.k_proj.weight": "llamaforcausallm__model__layers__71__self_attn__k_proj__weight", "model.layers.71.self_attn.v_proj.weight": "llamaforcausallm__model__layers__71__self_attn__v_proj__weight", "model.layers.71.self_attn.o_proj.weight": "llamaforcausallm__model__layers__71__self_attn__o_proj__weight", "model.layers.71.mlp.gate_proj.weight": "llamaforcausallm__model__layers__71__mlp__gate_proj__weight", "model.layers.71.mlp.up_proj.weight": "llamaforcausallm__model__layers__71__mlp__up_proj__weight", "model.layers.71.mlp.down_proj.weight": "llamaforcausallm__model__layers__71__mlp__down_proj__weight", "model.layers.71.input_layernorm.weight": "llamaforcausallm__model__layers__71__input_layernorm__weight", "model.layers.71.post_attention_layernorm.weight": "llamaforcausallm__model__layers__71__post_attention_layernorm__weight", "model.layers.72.self_attn.q_proj.weight": "llamaforcausallm__model__layers__72__self_attn__q_proj__weight", "model.layers.72.self_attn.k_proj.weight": "llamaforcausallm__model__layers__72__self_attn__k_proj__weight", "model.layers.72.self_attn.v_proj.weight": "llamaforcausallm__model__layers__72__self_attn__v_proj__weight", "model.layers.72.self_attn.o_proj.weight": "llamaforcausallm__model__layers__72__self_attn__o_proj__weight", "model.layers.72.mlp.gate_proj.weight": "llamaforcausallm__model__layers__72__mlp__gate_proj__weight", "model.layers.72.mlp.up_proj.weight": "llamaforcausallm__model__layers__72__mlp__up_proj__weight", "model.layers.72.mlp.down_proj.weight": "llamaforcausallm__model__layers__72__mlp__down_proj__weight", "model.layers.72.input_layernorm.weight": "llamaforcausallm__model__layers__72__input_layernorm__weight", "model.layers.72.post_attention_layernorm.weight": "llamaforcausallm__model__layers__72__post_attention_layernorm__weight", "model.layers.73.self_attn.q_proj.weight": "llamaforcausallm__model__layers__73__self_attn__q_proj__weight", "model.layers.73.self_attn.k_proj.weight": "llamaforcausallm__model__layers__73__self_attn__k_proj__weight", "model.layers.73.self_attn.v_proj.weight": "llamaforcausallm__model__layers__73__self_attn__v_proj__weight", "model.layers.73.self_attn.o_proj.weight": "llamaforcausallm__model__layers__73__self_attn__o_proj__weight", "model.layers.73.mlp.gate_proj.weight": "llamaforcausallm__model__layers__73__mlp__gate_proj__weight", "model.layers.73.mlp.up_proj.weight": "llamaforcausallm__model__layers__73__mlp__up_proj__weight", "model.layers.73.mlp.down_proj.weight": "llamaforcausallm__model__layers__73__mlp__down_proj__weight", "model.layers.73.input_layernorm.weight": "llamaforcausallm__model__layers__73__input_layernorm__weight", "model.layers.73.post_attention_layernorm.weight": "llamaforcausallm__model__layers__73__post_attention_layernorm__weight", "model.layers.74.self_attn.q_proj.weight": "llamaforcausallm__model__layers__74__self_attn__q_proj__weight", "model.layers.74.self_attn.k_proj.weight": "llamaforcausallm__model__layers__74__self_attn__k_proj__weight", "model.layers.74.self_attn.v_proj.weight": "llamaforcausallm__model__layers__74__self_attn__v_proj__weight", "model.layers.74.self_attn.o_proj.weight": "llamaforcausallm__model__layers__74__self_attn__o_proj__weight", "model.layers.74.mlp.gate_proj.weight": "llamaforcausallm__model__layers__74__mlp__gate_proj__weight", "model.layers.74.mlp.up_proj.weight": "llamaforcausallm__model__layers__74__mlp__up_proj__weight", "model.layers.74.mlp.down_proj.weight": "llamaforcausallm__model__layers__74__mlp__down_proj__weight", "model.layers.74.input_layernorm.weight": "llamaforcausallm__model__layers__74__input_layernorm__weight", "model.layers.74.post_attention_layernorm.weight": "llamaforcausallm__model__layers__74__post_attention_layernorm__weight", "model.layers.75.self_attn.q_proj.weight": "llamaforcausallm__model__layers__75__self_attn__q_proj__weight", "model.layers.75.self_attn.k_proj.weight": "llamaforcausallm__model__layers__75__self_attn__k_proj__weight", "model.layers.75.self_attn.v_proj.weight": "llamaforcausallm__model__layers__75__self_attn__v_proj__weight", "model.layers.75.self_attn.o_proj.weight": "llamaforcausallm__model__layers__75__self_attn__o_proj__weight", "model.layers.75.mlp.gate_proj.weight": "llamaforcausallm__model__layers__75__mlp__gate_proj__weight", "model.layers.75.mlp.up_proj.weight": "llamaforcausallm__model__layers__75__mlp__up_proj__weight", "model.layers.75.mlp.down_proj.weight": "llamaforcausallm__model__layers__75__mlp__down_proj__weight", "model.layers.75.input_layernorm.weight": "llamaforcausallm__model__layers__75__input_layernorm__weight", "model.layers.75.post_attention_layernorm.weight": "llamaforcausallm__model__layers__75__post_attention_layernorm__weight", "model.layers.76.self_attn.q_proj.weight": "llamaforcausallm__model__layers__76__self_attn__q_proj__weight", "model.layers.76.self_attn.k_proj.weight": "llamaforcausallm__model__layers__76__self_attn__k_proj__weight", "model.layers.76.self_attn.v_proj.weight": "llamaforcausallm__model__layers__76__self_attn__v_proj__weight", "model.layers.76.self_attn.o_proj.weight": "llamaforcausallm__model__layers__76__self_attn__o_proj__weight", "model.layers.76.mlp.gate_proj.weight": "llamaforcausallm__model__layers__76__mlp__gate_proj__weight", "model.layers.76.mlp.up_proj.weight": "llamaforcausallm__model__layers__76__mlp__up_proj__weight", "model.layers.76.mlp.down_proj.weight": "llamaforcausallm__model__layers__76__mlp__down_proj__weight", "model.layers.76.input_layernorm.weight": "llamaforcausallm__model__layers__76__input_layernorm__weight", "model.layers.76.post_attention_layernorm.weight": "llamaforcausallm__model__layers__76__post_attention_layernorm__weight", "model.layers.77.self_attn.q_proj.weight": "llamaforcausallm__model__layers__77__self_attn__q_proj__weight", "model.layers.77.self_attn.k_proj.weight": "llamaforcausallm__model__layers__77__self_attn__k_proj__weight", "model.layers.77.self_attn.v_proj.weight": "llamaforcausallm__model__layers__77__self_attn__v_proj__weight", "model.layers.77.self_attn.o_proj.weight": "llamaforcausallm__model__layers__77__self_attn__o_proj__weight", "model.layers.77.mlp.gate_proj.weight": "llamaforcausallm__model__layers__77__mlp__gate_proj__weight", "model.layers.77.mlp.up_proj.weight": "llamaforcausallm__model__layers__77__mlp__up_proj__weight", "model.layers.77.mlp.down_proj.weight": "llamaforcausallm__model__layers__77__mlp__down_proj__weight", "model.layers.77.input_layernorm.weight": "llamaforcausallm__model__layers__77__input_layernorm__weight", "model.layers.77.post_attention_layernorm.weight": "llamaforcausallm__model__layers__77__post_attention_layernorm__weight", "model.layers.78.self_attn.q_proj.weight": "llamaforcausallm__model__layers__78__self_attn__q_proj__weight", "model.layers.78.self_attn.k_proj.weight": "llamaforcausallm__model__layers__78__self_attn__k_proj__weight", "model.layers.78.self_attn.v_proj.weight": "llamaforcausallm__model__layers__78__self_attn__v_proj__weight", "model.layers.78.self_attn.o_proj.weight": "llamaforcausallm__model__layers__78__self_attn__o_proj__weight", "model.layers.78.mlp.gate_proj.weight": "llamaforcausallm__model__layers__78__mlp__gate_proj__weight", "model.layers.78.mlp.up_proj.weight": "llamaforcausallm__model__layers__78__mlp__up_proj__weight", "model.layers.78.mlp.down_proj.weight": "llamaforcausallm__model__layers__78__mlp__down_proj__weight", "model.layers.78.input_layernorm.weight": "llamaforcausallm__model__layers__78__input_layernorm__weight", "model.layers.78.post_attention_layernorm.weight": "llamaforcausallm__model__layers__78__post_attention_layernorm__weight", "model.layers.79.self_attn.q_proj.weight": "llamaforcausallm__model__layers__79__self_attn__q_proj__weight", "model.layers.79.self_attn.k_proj.weight": "llamaforcausallm__model__layers__79__self_attn__k_proj__weight", "model.layers.79.self_attn.v_proj.weight": "llamaforcausallm__model__layers__79__self_attn__v_proj__weight", "model.layers.79.self_attn.o_proj.weight": "llamaforcausallm__model__layers__79__self_attn__o_proj__weight", "model.layers.79.mlp.gate_proj.weight": "llamaforcausallm__model__layers__79__mlp__gate_proj__weight", "model.layers.79.mlp.up_proj.weight": "llamaforcausallm__model__layers__79__mlp__up_proj__weight", "model.layers.79.mlp.down_proj.weight": "llamaforcausallm__model__layers__79__mlp__down_proj__weight", "model.layers.79.input_layernorm.weight": "llamaforcausallm__model__layers__79__input_layernorm__weight", "model.layers.79.post_attention_layernorm.weight": "llamaforcausallm__model__layers__79__post_attention_layernorm__weight", "model.norm.weight": "llamaforcausallm__model__norm__weight", "lm_head.weight": "llamaforcausallm__lm_head__weight"}
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9d85f72e6d5ae1b8c6d0b53a1628b939973e135680d36da5c07e998eb25dea
3
+ size 873753
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": null,
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": true
42
+ }