Add files using upload-large-folder tool
Browse files- README.md +87 -0
- config.json +38 -0
- huggingface-metadata.txt +56 -0
- model.safetensors.index.json +1 -0
- output-00001-of-00010.safetensors +3 -0
- output-00002-of-00010.safetensors +3 -0
- output-00003-of-00010.safetensors +3 -0
- output-00004-of-00010.safetensors +3 -0
- output-00005-of-00010.safetensors +3 -0
- output-00006-of-00010.safetensors +3 -0
- output-00007-of-00010.safetensors +3 -0
- output-00008-of-00010.safetensors +3 -0
- output-00009-of-00010.safetensors +3 -0
- output-00010-of-00010.safetensors +3 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
README.md
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
---
|
4 |
+
# Join our Discord! https://discord.gg/Nbv9pQ88Xb
|
5 |
+
## Nearly 2500 members strong 💪
|
6 |
+
### Now with more channels! A hub for creatives and makers alike!
|
7 |
+
---
|
8 |
+
|
9 |
+
[BeaverAI](https://huggingface.co/BeaverAI) proudly presents...
|
10 |
+
|
11 |
+
# Behemoth 123B v2.2 🦣
|
12 |
+
|
13 |
+
> Nothing in the void is foreign to us. The place we go is the place we belong.
|
14 |
+
|
15 |
+

|
16 |
+
|
17 |
+
## Links
|
18 |
+
- Original: https://huggingface.co/TheDrummer/Behemoth-123B-v2.2
|
19 |
+
- GGUF: https://huggingface.co/TheDrummer/Behemoth-123B-v2.2-GGUF
|
20 |
+
- iMatrix: https://huggingface.co/bartowski/Behemoth-123B-v2.2-GGUF (recommended for smaller quants)
|
21 |
+
|
22 |
+
## Description
|
23 |
+
|
24 |
+
Behemoth v2.x is a finetune of the new Largestral 2411 with system prompt support. Testers have noted that **everything** felt improved.
|
25 |
+
|
26 |
+
### Usage
|
27 |
+
Testers say this frankenformat maximizes the model's potential: **Metharme** with Mistral's new system tokens
|
28 |
+
- `[SYSTEM_PROMPT] <|system|>{{system_message}}[/SYSTEM_PROMPT]<|user|>{{user_message}}<|model|>{{assistant_message}}`
|
29 |
+
- `<|system|>[SYSTEM_PROMPT] {{system_message}}[/SYSTEM_PROMPT]<|user|>{{user_message}}<|model|>{{assistant_message}}`
|
30 |
+
|
31 |
+
*Take note that the opening system tag SHOULD ALWAYS have a leading whitespace after it.*
|
32 |
+
|
33 |
+
Complete SillyTavern Settings in BeaverAI Club: https://discord.com/channels/1238219753324281886/1309968730301792370/1309968730301792370
|
34 |
+
|
35 |
+
### Versions
|
36 |
+
- [v2.0](https://huggingface.co/TheDrummer/Behemoth-123B-v2) is equivalent to Behemoth v1.0 (Classic)
|
37 |
+
- [v2.1](https://huggingface.co/TheDrummer/Behemoth-123B-v2.1) is equivalent to Behemoth v1.1 (Creative Boost)
|
38 |
+
- [v2.2](https://huggingface.co/TheDrummer/Behemoth-123B-v2.2) is an improvement of Behemoth v2.1 (Creative++)
|
39 |
+
|
40 |
+
## Special Thanks
|
41 |
+
|
42 |
+
Thank you to each and everyone who donated/subscribed in [Ko-Fi](https://ko-fi.com/thedrummer) 🙇 I hope to never disappoint!
|
43 |
+
|
44 |
+
```
|
45 |
+
Toasty Pigeon
|
46 |
+
theguywhogamesalot
|
47 |
+
Grozi
|
48 |
+
F
|
49 |
+
Marinara
|
50 |
+
Ko-fi Supporter
|
51 |
+
Grozi
|
52 |
+
Phaelon
|
53 |
+
ONTHEREDTEAM
|
54 |
+
EvarinSharath'fe(USM-Valor)
|
55 |
+
Silva
|
56 |
+
Dakkidaze
|
57 |
+
AlexTheVP
|
58 |
+
Pseudo
|
59 |
+
Kistara
|
60 |
+
Dr. Fjut
|
61 |
+
Grozi 🥈
|
62 |
+
KinjiHakari777
|
63 |
+
dustywintr
|
64 |
+
Syd
|
65 |
+
HumbleConsumer
|
66 |
+
Syd
|
67 |
+
Ko-fi Supporter
|
68 |
+
Arkamist
|
69 |
+
joe 🥇
|
70 |
+
Toad
|
71 |
+
Lied
|
72 |
+
Konnect
|
73 |
+
Kistara
|
74 |
+
Grozi 🥉
|
75 |
+
SleepDeprived3
|
76 |
+
Luigi
|
77 |
+
Nestor
|
78 |
+
```
|
79 |
+
|
80 |
+
https://ko-fi.com/thedrummer/leaderboard
|
81 |
+
|
82 |
+
```
|
83 |
+
Finetuned by yours truly,
|
84 |
+
Drummer
|
85 |
+
```
|
86 |
+
|
87 |
+

|
config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "merged/BEHEMOTH-SLERP",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"head_dim": 128,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 12288,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 28672,
|
14 |
+
"max_position_embeddings": 131072,
|
15 |
+
"model_type": "mistral",
|
16 |
+
"num_attention_heads": 96,
|
17 |
+
"num_hidden_layers": 88,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_theta": 1000000.0,
|
21 |
+
"sliding_window": null,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"torch_dtype": "bfloat16",
|
24 |
+
"transformers_version": "4.46.3",
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 32768,
|
27 |
+
"quantization_config": {
|
28 |
+
"quant_method": "exl2",
|
29 |
+
"version": "0.2.4",
|
30 |
+
"bits": 5.0,
|
31 |
+
"head_bits": 6,
|
32 |
+
"calibration": {
|
33 |
+
"rows": 115,
|
34 |
+
"length": 2048,
|
35 |
+
"dataset": "(default)"
|
36 |
+
}
|
37 |
+
}
|
38 |
+
}
|
huggingface-metadata.txt
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
url: https://huggingface.co/TheDrummer/Behemoth-123B-v2.2
|
2 |
+
branch: main
|
3 |
+
download date: 2024-11-24 15:28:55
|
4 |
+
sha256sum:
|
5 |
+
ece48c867a01372aa654a9d1ee83b9cff1cbc3c67a16b8ca1ea2e761bd42d74d model-00001-of-00051.safetensors
|
6 |
+
0fd214afbb8c63f53bd52b2332c507f0c5cdf8d2eeb31fd898bfe5aade0298d8 model-00002-of-00051.safetensors
|
7 |
+
a8a83f52644bb40381ea6439f8e3216b9ce4acf7418d0aae191a6ee25c2a28cb model-00003-of-00051.safetensors
|
8 |
+
1d524853774216078561240d8d118229337f980ba7b79d56db63091997e0b1a0 model-00004-of-00051.safetensors
|
9 |
+
772ef1633c42032592fc29e1ae4eb69c991e6cf4ca6c92694e8b01868a9a00be model-00005-of-00051.safetensors
|
10 |
+
a5f0d5296d32dcb651ddb2699d3eef2a3f0f21f0d71d0089cb14a94f15ed2341 model-00006-of-00051.safetensors
|
11 |
+
4af2753f431b3e564e1e57aaea52840b7edd287ebd1ba156ba7ea58862971719 model-00007-of-00051.safetensors
|
12 |
+
e3c52a1e6bd1f9b0e8b58d3821e49c41f808efe6419366d566652fa7102a4f46 model-00008-of-00051.safetensors
|
13 |
+
2d4e3d39ea6bfbe49dad337d6baa7cc5ba00ddbcf7046cdab62fd6a8c9cadd00 model-00009-of-00051.safetensors
|
14 |
+
37c3f9a488973aa688124a7c4931764fa33fb0a0ac4fc056b914a3eaf6799653 model-00010-of-00051.safetensors
|
15 |
+
009833c74b194babf313ed219f1888d379339f01ae7a0a7521690952c9cd1f21 model-00011-of-00051.safetensors
|
16 |
+
df866f417dec4f6d95975c977d6cb8f668b712abd758511536cd1207e24df4dc model-00012-of-00051.safetensors
|
17 |
+
6d74bbeb80fed6a979f95f2e6b62f29b5f84c06b8483916f29005fc42017ff02 model-00013-of-00051.safetensors
|
18 |
+
c8d8cd78bc6f24d424a53b6b845a5875073df7f6102d652e5a3dbbd3d38b6858 model-00014-of-00051.safetensors
|
19 |
+
d683c76a6145293532895eea10b928532b9d10fd3a8a5fcf85f9a20ada6bc344 model-00015-of-00051.safetensors
|
20 |
+
1d20830d156471457329580b4ff7f30cdbcec7fedb9269dd121d3af374938748 model-00016-of-00051.safetensors
|
21 |
+
85cf56aad11b204514db0ecba72ea039a0eda0093ceb32d49130bbb23ce8153d model-00017-of-00051.safetensors
|
22 |
+
6e91eaafc690f2e63fb5e18adf7f8be482347cc5f404021165d90ad36c0696a0 model-00018-of-00051.safetensors
|
23 |
+
7467ca97a3284903b713ee9d3220c3636bfb8a00f846dd1d7e29d1f62d647b2d model-00019-of-00051.safetensors
|
24 |
+
be9089795635482b1b644d6dea27629c74ac0dd4fd292e9b7c25a791812fe7ee model-00020-of-00051.safetensors
|
25 |
+
ea6cea0a74baa40443024e7c61a6ef90959cddd1e46fdd62d9a785dcdca69f6c model-00021-of-00051.safetensors
|
26 |
+
25583f97f78201a49b43f2f12888d72a2e6ad358abd46cde2bfa19d0ee561d7a model-00022-of-00051.safetensors
|
27 |
+
fbabb0727cef40cac2a42c1e7669df7bc0898088936d2c095d0413cfd54f3612 model-00023-of-00051.safetensors
|
28 |
+
aa90c86ebfdefcee8671a0b7443d5f38ab4a46b5e1f1c89aa24283a18cb57be7 model-00024-of-00051.safetensors
|
29 |
+
c9b63418054ae04e421f78c240cf1b7331565aca9a512e7bd022642b0fc0584f model-00025-of-00051.safetensors
|
30 |
+
61355db009ca377dc332b88e60bb9db8abbc5900327b6ed5902a114768476565 model-00026-of-00051.safetensors
|
31 |
+
cde464f114b99c520bf140571fcf42828a29bd41539c4e643caab9641b096c5e model-00027-of-00051.safetensors
|
32 |
+
b759d68150c637196b46e4a6dccddba0e5de9df28efc255355088e7267d4fc17 model-00028-of-00051.safetensors
|
33 |
+
233a12355b66af9ee40f17fe906f80ef0d57c8f9fdc3cf9bcf3e6ee307a71823 model-00029-of-00051.safetensors
|
34 |
+
c5847ab866185086728532351c033b8bc8b9862c595ec807b565ae83d309b006 model-00030-of-00051.safetensors
|
35 |
+
d69859b3e2d097ed89305eefd363d00419d4a1e08f535af716672d4a1551d527 model-00031-of-00051.safetensors
|
36 |
+
83d4e050ab847e65d2c9ef21f2adf186e3e34cb9c761986771cb8b44503d83cc model-00032-of-00051.safetensors
|
37 |
+
1b45f92205ce5e1d3b3ec7ca15424dfad84061d274c8eace305af1182dbee49b model-00033-of-00051.safetensors
|
38 |
+
f272689c80f9b0168c7070da51fed928000f8e07f236540062644e278ddb2425 model-00034-of-00051.safetensors
|
39 |
+
9d34d7a81d9abc4f3df2c7a1d28694f32e99e51cd84fdce1b61d91a1c02677f9 model-00035-of-00051.safetensors
|
40 |
+
8e6f40956ce804f557d0b125a59d165401e969ba33b5b81e76d3506129bb6b62 model-00036-of-00051.safetensors
|
41 |
+
00dc6efb192b8e457a568ce6a9384b670274c5cacbd698a8dc9170b58554551e model-00037-of-00051.safetensors
|
42 |
+
2f03da62d3b48d8bf788af17e965bf6efa727e2c23b4d29276bd5b5ddd48449c model-00038-of-00051.safetensors
|
43 |
+
c717fe6317dda57662317765030e02b8ea56dcd062e5707e7bfd93f5d34b10b0 model-00039-of-00051.safetensors
|
44 |
+
c1618b8061a647038db8fdad9d0d71fb97c4fe2e4bad0111aa641f19cf6b1ff7 model-00040-of-00051.safetensors
|
45 |
+
885628db4e12c651fc6f2edea2585ce4289b76298f2b9acb5d8c6a91af45895b model-00041-of-00051.safetensors
|
46 |
+
91c890a31a89bba2bbece1a81bbf4e03b49330c51d88910ee00eefa260d2521b model-00042-of-00051.safetensors
|
47 |
+
0dff5e390dcf6620a78319de9e15f5dec40bc2d849736101650ac57e0d1b9e52 model-00043-of-00051.safetensors
|
48 |
+
f8a8895c971e8db86bd4ce83ae2b1a11dd0754bea69120ce5e566f8cb1e22a87 model-00044-of-00051.safetensors
|
49 |
+
084c1ca7fbbdbdee0d1c740621afa032bc898945b84f8e6672283eb6544aa460 model-00045-of-00051.safetensors
|
50 |
+
a68827f058fd470e5ff0a2cc4783736f116c9dec26c840de1d8989cfa0519330 model-00046-of-00051.safetensors
|
51 |
+
0ac62203e1bb4041cb1c15ff1887deb1f2fd457cecbd1b9649cbf5fe15efbb03 model-00047-of-00051.safetensors
|
52 |
+
c6e99765a6a3da93506210a4ae10f2243cdc323a80300bf93eb85490c92a7db7 model-00048-of-00051.safetensors
|
53 |
+
d6f4daf4ba5f2f6c158a50f27657d319b1c5ea14cfcf3c1630cec6a327c3235a model-00049-of-00051.safetensors
|
54 |
+
f4174675db2645ff160b4e35cb860e460672c6bac41f19e1c636ae5a1dcf90bc model-00050-of-00051.safetensors
|
55 |
+
b02f04612938520f4598b27dbf7fc3ecc4aeb2db12660acc5a394b850d601aaf model-00051-of-00051.safetensors
|
56 |
+
1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a tokenizer.model
|
model.safetensors.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.5.1", "total_size": 245220139008}, "weight_map": {"lm_head.weight": "model-00001-of-00051.safetensors", "model.embed_tokens.weight": "model-00001-of-00051.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00051.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00051.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00051.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00051.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00051.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00051.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00051.safetensors", "model.layers.10.input_layernorm.weight": "model-00002-of-00051.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00002-of-00051.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00051.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00002-of-00051.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00051.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00051.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.input_layernorm.weight": "model-00003-of-00051.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00051.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00051.safetensors", "model.layers.12.input_layernorm.weight": "model-00003-of-00051.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00003-of-00051.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00004-of-00051.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00051.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.input_layernorm.weight": "model-00004-of-00051.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00051.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", "model.layers.14.input_layernorm.weight": "model-00004-of-00051.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00004-of-00051.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00005-of-00051.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.input_layernorm.weight": "model-00005-of-00051.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", "model.layers.16.input_layernorm.weight": "model-00005-of-00051.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00006-of-00051.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00006-of-00051.safetensors", "model.layers.17.input_layernorm.weight": "model-00006-of-00051.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00006-of-00051.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00006-of-00051.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00006-of-00051.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00051.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00051.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.input_layernorm.weight": "model-00007-of-00051.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00051.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00051.safetensors", "model.layers.19.input_layernorm.weight": "model-00007-of-00051.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00007-of-00051.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00008-of-00051.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00051.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.input_layernorm.weight": "model-00008-of-00051.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00008-of-00051.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", "model.layers.20.input_layernorm.weight": "model-00008-of-00051.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00008-of-00051.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00009-of-00051.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.input_layernorm.weight": "model-00009-of-00051.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", "model.layers.22.input_layernorm.weight": "model-00009-of-00051.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00010-of-00051.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00010-of-00051.safetensors", "model.layers.23.input_layernorm.weight": "model-00010-of-00051.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00010-of-00051.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00010-of-00051.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00010-of-00051.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00011-of-00051.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00011-of-00051.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.input_layernorm.weight": "model-00011-of-00051.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00011-of-00051.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00011-of-00051.safetensors", "model.layers.25.input_layernorm.weight": "model-00011-of-00051.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00011-of-00051.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00012-of-00051.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00012-of-00051.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.input_layernorm.weight": "model-00012-of-00051.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00012-of-00051.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", "model.layers.27.input_layernorm.weight": "model-00012-of-00051.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00012-of-00051.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00013-of-00051.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.input_layernorm.weight": "model-00013-of-00051.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", "model.layers.29.input_layernorm.weight": "model-00013-of-00051.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.mlp.gate_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00014-of-00051.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00014-of-00051.safetensors", "model.layers.3.input_layernorm.weight": "model-00014-of-00051.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00014-of-00051.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00014-of-00051.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00014-of-00051.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00015-of-00051.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00015-of-00051.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.input_layernorm.weight": "model-00015-of-00051.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00015-of-00051.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00015-of-00051.safetensors", "model.layers.31.input_layernorm.weight": "model-00015-of-00051.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00015-of-00051.safetensors", "model.layers.31.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00016-of-00051.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00016-of-00051.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.input_layernorm.weight": "model-00016-of-00051.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.mlp.gate_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.mlp.up_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00016-of-00051.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", "model.layers.33.input_layernorm.weight": "model-00016-of-00051.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00016-of-00051.safetensors", "model.layers.33.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", "model.layers.33.mlp.up_proj.weight": "model-00017-of-00051.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.input_layernorm.weight": "model-00017-of-00051.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.mlp.up_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", "model.layers.35.input_layernorm.weight": "model-00017-of-00051.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.mlp.gate_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.mlp.up_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00018-of-00051.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00018-of-00051.safetensors", "model.layers.36.input_layernorm.weight": "model-00018-of-00051.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00018-of-00051.safetensors", "model.layers.36.mlp.gate_proj.weight": "model-00018-of-00051.safetensors", "model.layers.36.mlp.up_proj.weight": "model-00018-of-00051.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00019-of-00051.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00019-of-00051.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.input_layernorm.weight": "model-00019-of-00051.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.mlp.up_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00019-of-00051.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00019-of-00051.safetensors", "model.layers.38.input_layernorm.weight": "model-00019-of-00051.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00019-of-00051.safetensors", "model.layers.38.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", "model.layers.38.mlp.up_proj.weight": "model-00020-of-00051.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00020-of-00051.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.input_layernorm.weight": "model-00020-of-00051.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.mlp.gate_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.mlp.up_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00020-of-00051.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", "model.layers.4.input_layernorm.weight": "model-00020-of-00051.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00020-of-00051.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00021-of-00051.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.input_layernorm.weight": "model-00021-of-00051.safetensors", "model.layers.40.mlp.down_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.mlp.up_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", "model.layers.41.input_layernorm.weight": "model-00021-of-00051.safetensors", "model.layers.41.mlp.down_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.mlp.gate_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.mlp.up_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00022-of-00051.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00022-of-00051.safetensors", "model.layers.42.input_layernorm.weight": "model-00022-of-00051.safetensors", "model.layers.42.mlp.down_proj.weight": "model-00022-of-00051.safetensors", "model.layers.42.mlp.gate_proj.weight": "model-00022-of-00051.safetensors", "model.layers.42.mlp.up_proj.weight": "model-00022-of-00051.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00023-of-00051.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00023-of-00051.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.input_layernorm.weight": "model-00023-of-00051.safetensors", "model.layers.43.mlp.down_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.mlp.up_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00023-of-00051.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00023-of-00051.safetensors", "model.layers.44.input_layernorm.weight": "model-00023-of-00051.safetensors", "model.layers.44.mlp.down_proj.weight": "model-00023-of-00051.safetensors", "model.layers.44.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", "model.layers.44.mlp.up_proj.weight": "model-00024-of-00051.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00024-of-00051.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.input_layernorm.weight": "model-00024-of-00051.safetensors", "model.layers.45.mlp.down_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.mlp.gate_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.mlp.up_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00024-of-00051.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", "model.layers.46.input_layernorm.weight": "model-00024-of-00051.safetensors", "model.layers.46.mlp.down_proj.weight": "model-00024-of-00051.safetensors", "model.layers.46.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", "model.layers.46.mlp.up_proj.weight": "model-00025-of-00051.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.input_layernorm.weight": "model-00025-of-00051.safetensors", "model.layers.47.mlp.down_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.mlp.up_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", "model.layers.48.input_layernorm.weight": "model-00025-of-00051.safetensors", "model.layers.48.mlp.down_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.mlp.gate_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.mlp.up_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00026-of-00051.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00026-of-00051.safetensors", "model.layers.49.input_layernorm.weight": "model-00026-of-00051.safetensors", "model.layers.49.mlp.down_proj.weight": "model-00026-of-00051.safetensors", "model.layers.49.mlp.gate_proj.weight": "model-00026-of-00051.safetensors", "model.layers.49.mlp.up_proj.weight": "model-00026-of-00051.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00027-of-00051.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00027-of-00051.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.input_layernorm.weight": "model-00027-of-00051.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00027-of-00051.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00027-of-00051.safetensors", "model.layers.50.input_layernorm.weight": "model-00027-of-00051.safetensors", "model.layers.50.mlp.down_proj.weight": "model-00027-of-00051.safetensors", "model.layers.50.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", "model.layers.50.mlp.up_proj.weight": "model-00028-of-00051.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00028-of-00051.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.input_layernorm.weight": "model-00028-of-00051.safetensors", "model.layers.51.mlp.down_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.mlp.gate_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.mlp.up_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00028-of-00051.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", "model.layers.52.input_layernorm.weight": "model-00028-of-00051.safetensors", "model.layers.52.mlp.down_proj.weight": "model-00028-of-00051.safetensors", "model.layers.52.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", "model.layers.52.mlp.up_proj.weight": "model-00029-of-00051.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.input_layernorm.weight": "model-00029-of-00051.safetensors", "model.layers.53.mlp.down_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.mlp.up_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", "model.layers.54.input_layernorm.weight": "model-00029-of-00051.safetensors", "model.layers.54.mlp.down_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.mlp.gate_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.mlp.up_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00030-of-00051.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00030-of-00051.safetensors", "model.layers.55.input_layernorm.weight": "model-00030-of-00051.safetensors", "model.layers.55.mlp.down_proj.weight": "model-00030-of-00051.safetensors", "model.layers.55.mlp.gate_proj.weight": "model-00030-of-00051.safetensors", "model.layers.55.mlp.up_proj.weight": "model-00030-of-00051.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00031-of-00051.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00031-of-00051.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.input_layernorm.weight": "model-00031-of-00051.safetensors", "model.layers.56.mlp.down_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.mlp.up_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00031-of-00051.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00031-of-00051.safetensors", "model.layers.57.input_layernorm.weight": "model-00031-of-00051.safetensors", "model.layers.57.mlp.down_proj.weight": "model-00031-of-00051.safetensors", "model.layers.57.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", "model.layers.57.mlp.up_proj.weight": "model-00032-of-00051.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00032-of-00051.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.input_layernorm.weight": "model-00032-of-00051.safetensors", "model.layers.58.mlp.down_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.mlp.gate_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.mlp.up_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00032-of-00051.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", "model.layers.59.input_layernorm.weight": "model-00032-of-00051.safetensors", "model.layers.59.mlp.down_proj.weight": "model-00032-of-00051.safetensors", "model.layers.59.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", "model.layers.59.mlp.up_proj.weight": "model-00033-of-00051.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.input_layernorm.weight": "model-00033-of-00051.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", "model.layers.60.input_layernorm.weight": "model-00033-of-00051.safetensors", "model.layers.60.mlp.down_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.mlp.gate_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.mlp.up_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00034-of-00051.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00034-of-00051.safetensors", "model.layers.61.input_layernorm.weight": "model-00034-of-00051.safetensors", "model.layers.61.mlp.down_proj.weight": "model-00034-of-00051.safetensors", "model.layers.61.mlp.gate_proj.weight": "model-00034-of-00051.safetensors", "model.layers.61.mlp.up_proj.weight": "model-00034-of-00051.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", "model.layers.61.self_attn.o_proj.weight": "model-00035-of-00051.safetensors", "model.layers.61.self_attn.q_proj.weight": "model-00035-of-00051.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.input_layernorm.weight": "model-00035-of-00051.safetensors", "model.layers.62.mlp.down_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.mlp.up_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.self_attn.o_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.self_attn.q_proj.weight": "model-00035-of-00051.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00035-of-00051.safetensors", "model.layers.63.input_layernorm.weight": "model-00035-of-00051.safetensors", "model.layers.63.mlp.down_proj.weight": "model-00035-of-00051.safetensors", "model.layers.63.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", "model.layers.63.mlp.up_proj.weight": "model-00036-of-00051.safetensors", "model.layers.63.post_attention_layernorm.weight": "model-00036-of-00051.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", "model.layers.63.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", "model.layers.63.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.input_layernorm.weight": "model-00036-of-00051.safetensors", "model.layers.64.mlp.down_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.mlp.gate_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.mlp.up_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.post_attention_layernorm.weight": "model-00036-of-00051.safetensors", "model.layers.64.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", "model.layers.64.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", "model.layers.65.input_layernorm.weight": "model-00036-of-00051.safetensors", "model.layers.65.mlp.down_proj.weight": "model-00036-of-00051.safetensors", "model.layers.65.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", "model.layers.65.mlp.up_proj.weight": "model-00037-of-00051.safetensors", "model.layers.65.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", "model.layers.65.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", "model.layers.65.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", "model.layers.65.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", "model.layers.65.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.input_layernorm.weight": "model-00037-of-00051.safetensors", "model.layers.66.mlp.down_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.mlp.up_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", "model.layers.66.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", "model.layers.66.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", "model.layers.67.input_layernorm.weight": "model-00037-of-00051.safetensors", "model.layers.67.mlp.down_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.mlp.gate_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.mlp.up_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", "model.layers.67.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.self_attn.o_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.self_attn.q_proj.weight": "model-00038-of-00051.safetensors", "model.layers.67.self_attn.v_proj.weight": "model-00038-of-00051.safetensors", "model.layers.68.input_layernorm.weight": "model-00038-of-00051.safetensors", "model.layers.68.mlp.down_proj.weight": "model-00038-of-00051.safetensors", "model.layers.68.mlp.gate_proj.weight": "model-00038-of-00051.safetensors", "model.layers.68.mlp.up_proj.weight": "model-00038-of-00051.safetensors", "model.layers.68.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", "model.layers.68.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", "model.layers.68.self_attn.o_proj.weight": "model-00039-of-00051.safetensors", "model.layers.68.self_attn.q_proj.weight": "model-00039-of-00051.safetensors", "model.layers.68.self_attn.v_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.input_layernorm.weight": "model-00039-of-00051.safetensors", "model.layers.69.mlp.down_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.mlp.up_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", "model.layers.69.self_attn.k_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.self_attn.o_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.self_attn.q_proj.weight": "model-00039-of-00051.safetensors", "model.layers.69.self_attn.v_proj.weight": "model-00039-of-00051.safetensors", "model.layers.7.input_layernorm.weight": "model-00039-of-00051.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00039-of-00051.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00040-of-00051.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00040-of-00051.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.input_layernorm.weight": "model-00040-of-00051.safetensors", "model.layers.70.mlp.down_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.mlp.gate_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.mlp.up_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.post_attention_layernorm.weight": "model-00040-of-00051.safetensors", "model.layers.70.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", "model.layers.70.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", "model.layers.71.input_layernorm.weight": "model-00040-of-00051.safetensors", "model.layers.71.mlp.down_proj.weight": "model-00040-of-00051.safetensors", "model.layers.71.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", "model.layers.71.mlp.up_proj.weight": "model-00041-of-00051.safetensors", "model.layers.71.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", "model.layers.71.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", "model.layers.71.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", "model.layers.71.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", "model.layers.71.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.input_layernorm.weight": "model-00041-of-00051.safetensors", "model.layers.72.mlp.down_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.mlp.up_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", "model.layers.72.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", "model.layers.72.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", "model.layers.73.input_layernorm.weight": "model-00041-of-00051.safetensors", "model.layers.73.mlp.down_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.mlp.gate_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.mlp.up_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", "model.layers.73.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.self_attn.o_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.self_attn.q_proj.weight": "model-00042-of-00051.safetensors", "model.layers.73.self_attn.v_proj.weight": "model-00042-of-00051.safetensors", "model.layers.74.input_layernorm.weight": "model-00042-of-00051.safetensors", "model.layers.74.mlp.down_proj.weight": "model-00042-of-00051.safetensors", "model.layers.74.mlp.gate_proj.weight": "model-00042-of-00051.safetensors", "model.layers.74.mlp.up_proj.weight": "model-00042-of-00051.safetensors", "model.layers.74.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", "model.layers.74.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", "model.layers.74.self_attn.o_proj.weight": "model-00043-of-00051.safetensors", "model.layers.74.self_attn.q_proj.weight": "model-00043-of-00051.safetensors", "model.layers.74.self_attn.v_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.input_layernorm.weight": "model-00043-of-00051.safetensors", "model.layers.75.mlp.down_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.mlp.up_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", "model.layers.75.self_attn.k_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.self_attn.o_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.self_attn.q_proj.weight": "model-00043-of-00051.safetensors", "model.layers.75.self_attn.v_proj.weight": "model-00043-of-00051.safetensors", "model.layers.76.input_layernorm.weight": "model-00043-of-00051.safetensors", "model.layers.76.mlp.down_proj.weight": "model-00043-of-00051.safetensors", "model.layers.76.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", "model.layers.76.mlp.up_proj.weight": "model-00044-of-00051.safetensors", "model.layers.76.post_attention_layernorm.weight": "model-00044-of-00051.safetensors", "model.layers.76.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", "model.layers.76.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", "model.layers.76.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", "model.layers.76.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.input_layernorm.weight": "model-00044-of-00051.safetensors", "model.layers.77.mlp.down_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.mlp.gate_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.mlp.up_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.post_attention_layernorm.weight": "model-00044-of-00051.safetensors", "model.layers.77.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", "model.layers.77.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", "model.layers.78.input_layernorm.weight": "model-00044-of-00051.safetensors", "model.layers.78.mlp.down_proj.weight": "model-00044-of-00051.safetensors", "model.layers.78.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", "model.layers.78.mlp.up_proj.weight": "model-00045-of-00051.safetensors", "model.layers.78.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", "model.layers.78.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", "model.layers.78.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", "model.layers.78.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", "model.layers.78.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.input_layernorm.weight": "model-00045-of-00051.safetensors", "model.layers.79.mlp.down_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.mlp.up_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", "model.layers.79.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", "model.layers.79.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", "model.layers.8.input_layernorm.weight": "model-00045-of-00051.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00046-of-00051.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00046-of-00051.safetensors", "model.layers.80.input_layernorm.weight": "model-00046-of-00051.safetensors", "model.layers.80.mlp.down_proj.weight": "model-00046-of-00051.safetensors", "model.layers.80.mlp.gate_proj.weight": "model-00046-of-00051.safetensors", "model.layers.80.mlp.up_proj.weight": "model-00046-of-00051.safetensors", "model.layers.80.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", "model.layers.80.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", "model.layers.80.self_attn.o_proj.weight": "model-00047-of-00051.safetensors", "model.layers.80.self_attn.q_proj.weight": "model-00047-of-00051.safetensors", "model.layers.80.self_attn.v_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.input_layernorm.weight": "model-00047-of-00051.safetensors", "model.layers.81.mlp.down_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.mlp.up_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", "model.layers.81.self_attn.k_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.self_attn.o_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.self_attn.q_proj.weight": "model-00047-of-00051.safetensors", "model.layers.81.self_attn.v_proj.weight": "model-00047-of-00051.safetensors", "model.layers.82.input_layernorm.weight": "model-00047-of-00051.safetensors", "model.layers.82.mlp.down_proj.weight": "model-00047-of-00051.safetensors", "model.layers.82.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", "model.layers.82.mlp.up_proj.weight": "model-00048-of-00051.safetensors", "model.layers.82.post_attention_layernorm.weight": "model-00048-of-00051.safetensors", "model.layers.82.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", "model.layers.82.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", "model.layers.82.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", "model.layers.82.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.input_layernorm.weight": "model-00048-of-00051.safetensors", "model.layers.83.mlp.down_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.mlp.gate_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.mlp.up_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.post_attention_layernorm.weight": "model-00048-of-00051.safetensors", "model.layers.83.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", "model.layers.83.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", "model.layers.84.input_layernorm.weight": "model-00048-of-00051.safetensors", "model.layers.84.mlp.down_proj.weight": "model-00048-of-00051.safetensors", "model.layers.84.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", "model.layers.84.mlp.up_proj.weight": "model-00049-of-00051.safetensors", "model.layers.84.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", "model.layers.84.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", "model.layers.84.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", "model.layers.84.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", "model.layers.84.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.input_layernorm.weight": "model-00049-of-00051.safetensors", "model.layers.85.mlp.down_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.mlp.up_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", "model.layers.85.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", "model.layers.85.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", "model.layers.86.input_layernorm.weight": "model-00049-of-00051.safetensors", "model.layers.86.mlp.down_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.mlp.gate_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.mlp.up_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", "model.layers.86.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.self_attn.o_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.self_attn.q_proj.weight": "model-00050-of-00051.safetensors", "model.layers.86.self_attn.v_proj.weight": "model-00050-of-00051.safetensors", "model.layers.87.input_layernorm.weight": "model-00050-of-00051.safetensors", "model.layers.87.mlp.down_proj.weight": "model-00050-of-00051.safetensors", "model.layers.87.mlp.gate_proj.weight": "model-00050-of-00051.safetensors", "model.layers.87.mlp.up_proj.weight": "model-00050-of-00051.safetensors", "model.layers.87.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", "model.layers.87.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", "model.layers.87.self_attn.o_proj.weight": "model-00051-of-00051.safetensors", "model.layers.87.self_attn.q_proj.weight": "model-00051-of-00051.safetensors", "model.layers.87.self_attn.v_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.input_layernorm.weight": "model-00051-of-00051.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00051-of-00051.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00051-of-00051.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00051-of-00051.safetensors", "model.norm.weight": "model-00051-of-00051.safetensors"}}
|
output-00001-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e5378425ffce7bdc05abb4287acefde8fed9f8dce40a9394e39fa3bf6a8e223
|
3 |
+
size 8536290276
|
output-00002-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04d0b6d20739188eb4cd0599af2fd965a398864f6bd5f482eaab65ab68c42d03
|
3 |
+
size 8531862340
|
output-00003-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c59ec663d3c414b9d4f7a3b21e56a9ae9f21abf9d130be36f63a627d3be3a072
|
3 |
+
size 8463819280
|
output-00004-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:765a481e5c6c93e8dc10997ba11a640ef8b7928d832c94368e15a90e7efcd9b3
|
3 |
+
size 8585704600
|
output-00005-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f41dc07fe3884a44666cfe60df4332966c5563f68efd2250a1a484e274f0de8
|
3 |
+
size 8372135220
|
output-00006-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b60b584854621ed546ea842221d78fd000fd7a2f918a52d0e989889d4b5942d
|
3 |
+
size 8486848300
|
output-00007-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea73e1346ca23e307bc7a7033bcf05a8451086555480ed2ed22530392b397b31
|
3 |
+
size 8429673532
|
output-00008-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a059a7306d38aa13ef29420db70854b473a4d571e5f48d187556ab71ecfeb37c
|
3 |
+
size 8486970432
|
output-00009-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fd60c07de513c19e3f87c21f8930818e4dce130b82f0377ab80abcfbdd0a0a7
|
3 |
+
size 8496609120
|
output-00010-of-00010.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12e5998e18b36b4e08909047c417d71e7ca2b822e9a575ec14a66c9520206814
|
3 |
+
size 852272432
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a
|
3 |
+
size 587562
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|