AdaptLLM commited on
Commit
1e6eba6
1 Parent(s): 056bdea

AdaptLLM-med-v0

Browse files
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 1,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "intermediate_size": 11008,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_hidden_layers": 32,
15
+ "pad_token_id": 32000,
16
+ "rms_norm_eps": 1e-06,
17
+ "torch_dtype": "float16",
18
+ "transformers_version": "4.27.0.dev0",
19
+ "use_cache": true,
20
+ "vocab_size": 32001
21
+ }
generation_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_from_model_config": true, "bos_token_id": 0, "eos_token_id": 1, "pad_token_id": 0, "transformers_version": "4.27.0.dev0"}
pytorch_model-00001-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa3fc237bbecb8982e67a92d410e1daa48c720a9d566e1c4c18b3e3890c71bd
3
+ size 809520963
pytorch_model-00002-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24146cba79a478ecea5d900870c75b248aee39afd5995e123cdb31be12a754aa
3
+ size 809520963
pytorch_model-00003-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a4ab801023b1acacb638877c74aed27bd9e12898571fce97af0f264d05f7de
3
+ size 809520963
pytorch_model-00004-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2d6973e059cd01ca44a7a19c87b1a3eb8c1b2f4e82c6dc009abdf9d2f3b755
3
+ size 809520963
pytorch_model-00005-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12b9adb06870196c9c33c6bc53b90faf767d4a34f0eca34910667431c3b1c4c
3
+ size 809520963
pytorch_model-00006-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee4188d6fcd05385e65a5137e00fdff80fb43df7f23e14db1990517a8a881932
3
+ size 809520963
pytorch_model-00007-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eeb9db5cb455aa3c30ace5182ea655131a10d21e8621efd091771d9dd5ec3ac
3
+ size 809520963
pytorch_model-00008-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84afeffda180a5a5f893178e290c12a69aa86a6b8cf306e7152b8ad68ef8e07a
3
+ size 809520963
pytorch_model-00009-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c48999b6911c0e2531e6c52fd100761bb97c8c903b0e46f0310346878bc099
3
+ size 809520963
pytorch_model-00010-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbc33e2ee3e5399a99d8e6456cb226914df88bc0ca9a70c8dc816fab026421c
3
+ size 809520963
pytorch_model-00011-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a01412018382124a7cef1363771dd3c9e2371b02fd2291c19b82caf8de8b180
3
+ size 809520963
pytorch_model-00012-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548ba11c335a6b8193ad1a64baf9ac6537af66a971763fff2928e50eac7f59c4
3
+ size 809520963
pytorch_model-00013-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4af705eb4be81c0cc700be69ccc2c30f64f4dca5da97edfd3adfd2164c34f1f
3
+ size 809520963
pytorch_model-00014-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f6d4d1e17f34524de3fa901642e048ce9434ed8ac62dd0339d6957e7c590dc
3
+ size 809520963
pytorch_model-00015-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b051151d1a160c0e5a506bbeb499f2596f8a43a9c78d83bbdf2628b7a2dcf41
3
+ size 809520963
pytorch_model-00016-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c0d5f3bf78378cf0fee2d31277b7658cf7691a992c3079435003db7e795735
3
+ size 809520963
pytorch_model-00017-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d8a2dd3e8a495f2f154bfa2388f6ebc52de27dfcbd075c264846f02db98c6ce
3
+ size 809520963
pytorch_model-00018-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d1792e4ffe493d73f204b26d8e95c96a91cbcca081cc7c8d7702c16b3f8988
3
+ size 809520963
pytorch_model-00019-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5df54ebad10b28bd0d865e214e8c1b6a860fdf1c144cbff635604b7e0e535420
3
+ size 809520963
pytorch_model-00020-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1aa4c2f6c0fcc651af7e0a1328e3813d8f6fc6e3ad01fb68afee8d357e6d6e
3
+ size 809520963
pytorch_model-00021-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0690ce2c8e0fb80e267c7964adfbb14ea60fc38a9b75ad1e575658160bb5af50
3
+ size 809520963
pytorch_model-00022-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9a2286a812fd3dae3138c616aa8f340725a6d2b4e0ea4097d18dc1464af561
3
+ size 809520963
pytorch_model-00023-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1104557819a42e487d5ee4051339d28f80aacc518644ecbdee8abb4a7d01d328
3
+ size 809520963
pytorch_model-00024-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041135a90bf937d0d3c00dd536160569e53f921d8d941e981c531b21bdad47a1
3
+ size 809520963
pytorch_model-00025-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f6674f85d535eb2d80d370c991fbcdd1ae8fe8a79947a16d898aba1cc0c1a0
3
+ size 809520963
pytorch_model-00026-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb846aedb10220d05979eee91be214cde6d6c59441be65edf90a7036827ba9b
3
+ size 809520963
pytorch_model-00027-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab91db96ed9bb22fe4afbc76750608d71eec6a1ec28d65e562dc5e82f058cf9f
3
+ size 809520963
pytorch_model-00028-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ec4a1561691311072407da847d933e7ac826f7b7d471b7384d876aa3dd5d6cb
3
+ size 809520963
pytorch_model-00029-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f769ff6cca6fc7892e6c78e5938694f1908346800bb724bb338bb54020b95a07
3
+ size 809520963
pytorch_model-00030-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157fd26afac79204fcf0964270b4bc54d2a0655a9b008eeede2c85b5caa6f703
3
+ size 809520963
pytorch_model-00031-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c94c0f7d851aa4b3479459cbf760cb7b1740508d422d0f4ce0efec9361729b4
3
+ size 809520963
pytorch_model-00032-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc45d3e8518655ed9b78dfe5766dd5ace7048f8959621b4e5c51e609bd772570
3
+ size 809520963
pytorch_model-00033-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe79967f185823af08d57640c33cb55464d86895b514435cc2ebaf61d73a9d1
3
+ size 524314060
pytorch_model.bin.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"weight_map": {"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00033.bin", "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00033.bin", "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.down_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.up_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.input_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00033.bin", "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.down_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.up_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.input_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00033.bin", "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.down_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.up_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.input_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00033.bin", "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.down_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.up_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.input_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00033.bin", "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.down_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.up_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.input_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00033.bin", "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.down_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.up_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.input_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00033.bin", "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.down_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.up_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.input_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00033.bin", "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.down_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.up_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.input_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00033.bin", "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.down_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.up_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.input_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00033.bin", "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.down_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.up_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.input_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00033.bin", "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.down_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.up_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.input_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00033.bin", "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.down_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.up_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.input_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00033.bin", "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.down_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.up_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.input_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00033.bin", "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.down_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.up_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.input_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00033.bin", "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.down_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.up_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.input_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00033.bin", "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.down_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.up_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.input_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00033.bin", "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.down_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.up_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.input_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00033.bin", "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.down_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.up_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.input_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00033.bin", "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.down_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.up_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.input_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00033.bin", "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.down_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.up_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.input_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00033.bin", "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.down_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.up_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.input_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00033.bin", "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.down_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.up_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.input_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00033.bin", "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.down_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.up_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.input_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00033.bin", "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.down_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.up_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.input_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00033.bin", "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.down_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.up_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.input_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00033.bin", "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.down_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.up_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.input_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00033.bin", "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.down_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.up_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.input_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00033.bin", "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.down_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.up_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.input_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00030-of-00033.bin", "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.down_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.up_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.input_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00031-of-00033.bin", "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.down_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.up_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.input_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00032-of-00033.bin", "model.embed_tokens.weight": "pytorch_model-00033-of-00033.bin", "model.norm.weight": "pytorch_model-00033-of-00033.bin", "lm_head.weight": "pytorch_model-00033-of-00033.bin"}, "metadata": {"total_size": 13476835328}}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa299a0662fc3bf7ada4d816b1cb9fdeb472e9edf6c2ffbc7f00e1b5ff5ff968
3
+ size 499739
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "model_max_length": 2048,
5
+ "tokenizer_class": "LlamaTokenizer",
6
+ "unk_token": "<unk>",
7
+ "pad_token": "<pad>"
8
+ }