Upload 39 files
Browse files- config.json +35 -0
- mergekit_moe_config.yml +0 -0
- merges.txt +0 -0
- model-00001-of-00031.safetensors +3 -0
- model-00002-of-00031.safetensors +3 -0
- model-00003-of-00031.safetensors +3 -0
- model-00004-of-00031.safetensors +3 -0
- model-00005-of-00031.safetensors +3 -0
- model-00006-of-00031.safetensors +3 -0
- model-00007-of-00031.safetensors +3 -0
- model-00008-of-00031.safetensors +3 -0
- model-00009-of-00031.safetensors +3 -0
- model-00010-of-00031.safetensors +3 -0
- model-00011-of-00031.safetensors +3 -0
- model-00012-of-00031.safetensors +3 -0
- model-00013-of-00031.safetensors +3 -0
- model-00014-of-00031.safetensors +3 -0
- model-00015-of-00031.safetensors +3 -0
- model-00016-of-00031.safetensors +3 -0
- model-00017-of-00031.safetensors +3 -0
- model-00018-of-00031.safetensors +3 -0
- model-00019-of-00031.safetensors +3 -0
- model-00020-of-00031.safetensors +3 -0
- model-00021-of-00031.safetensors +3 -0
- model-00022-of-00031.safetensors +3 -0
- model-00023-of-00031.safetensors +3 -0
- model-00024-of-00031.safetensors +3 -0
- model-00025-of-00031.safetensors +3 -0
- model-00026-of-00031.safetensors +3 -0
- model-00027-of-00031.safetensors +3 -0
- model-00028-of-00031.safetensors +3 -0
- model-00029-of-00031.safetensors +3 -0
- model-00030-of-00031.safetensors +3 -0
- model-00031-of-00031.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +24 -0
- tokenizer.json +0 -0
- tokenizer_config.json +38 -0
- vocab.json +0 -0
config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ahxt/LiteLlama-460M-1T",
|
3 |
+
"architectures": [
|
4 |
+
"MixtralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layernorm_epsilon": 1e-05,
|
15 |
+
"max_position_embeddings": 1024,
|
16 |
+
"model_type": "mixtral",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_experts_per_tok": 2,
|
19 |
+
"num_hidden_layers": 24,
|
20 |
+
"num_key_value_heads": 2,
|
21 |
+
"num_local_experts": 512,
|
22 |
+
"output_router_logits": false,
|
23 |
+
"pad_token_id": 0,
|
24 |
+
"pretraining_tp": 1,
|
25 |
+
"rms_norm_eps": 1e-06,
|
26 |
+
"rope_scaling": null,
|
27 |
+
"rope_theta": 10000.0,
|
28 |
+
"router_aux_loss_coef": 0.001,
|
29 |
+
"sliding_window": null,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "bfloat16",
|
32 |
+
"transformers_version": "4.37.0.dev0",
|
33 |
+
"use_cache": true,
|
34 |
+
"vocab_size": 50304
|
35 |
+
}
|
mergekit_moe_config.yml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc4c30f0b734a31a16e6616ebf369cbbeec64c833ee70932105a34f772b42544
|
3 |
+
size 9995758448
|
model-00002-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac9d2b0c074a47649cde03c0bb0617f27755dc7527682d1d38edff2a46914292
|
3 |
+
size 9999377632
|
model-00003-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:621f19863c607f829228a050dffde7a5fb76d3fba923c33301d4463bc0ff7dab
|
3 |
+
size 9999377632
|
model-00004-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d23f36428900f02900dc2531ea6a1806c6237e5b42bcbd94383eb7ebb831b7e
|
3 |
+
size 9999377520
|
model-00005-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0f2781bf1ca5cc69d9db77875372710b79954340771b34a4ea87d5fe80123b3
|
3 |
+
size 9999378448
|
model-00006-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e767cc5f6c7931acad370c67bc9615a04c15daaf4be8f3c110e2736ff50bec39
|
3 |
+
size 9999378824
|
model-00007-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b08485f9c33ca372d07d3a75a905b2d60ed2aa2b7954ebfedf74e7d38802f96b
|
3 |
+
size 9999378712
|
model-00008-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d243111db7f644fd38d65d3ef4db486f418d9be7447971c36cb1d18f5946fd2a
|
3 |
+
size 9999378824
|
model-00009-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f80b51ce01743bdf48fe3eb2b172eeadcf47964e9256b193e95affae4a689496
|
3 |
+
size 9999378824
|
model-00010-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9441fc462dfc0b982ef2a8ad15b2945dfa5ef27c63c03444344217463880116
|
3 |
+
size 9999378712
|
model-00011-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d74e0b6fb4b39fac8673260f8eada1deae399e1d6c382c1b6be2498a9495d504
|
3 |
+
size 9999378024
|
model-00012-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11c4153f4d3975eb66c031e610736dd98f56a3a0c40ee84c9cfe6a8580319cb9
|
3 |
+
size 9999377632
|
model-00013-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d85f2a65a3d3a483d7794217b9ca3d792c7e284aa4a642b0edb9cf627ee897e
|
3 |
+
size 9999377520
|
model-00014-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02eab7ad832eaa569604240fa713dc7105d4831a2068361b0348362cde858dc2
|
3 |
+
size 9999377632
|
model-00015-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:733210ab9a820acf5053d94968feb6802a8fd5ea3be334e893472d88cf704c24
|
3 |
+
size 9999378080
|
model-00016-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:553bec73fb656c07a51141d5eeffe7aeb7a9b04e85b6f6c1cb8b511b51a1495a
|
3 |
+
size 9999378712
|
model-00017-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c92de3c49c5a766c55c76854f5001a000542f706c97dc67dffa516329c185f25
|
3 |
+
size 9999378824
|
model-00018-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c0abcd986520a648aad7c025e9cb3ff495bea461b74207ad80424b4162ee2d2
|
3 |
+
size 9999378824
|
model-00019-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc94d9c543391f9b978df723c220124b1a3252ed72def81c2bbaad26339b302d
|
3 |
+
size 9999378720
|
model-00020-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f06a4f03ef54dbaf51503fa48364f02c2842220c60dab5483e3da3d8d1753e1d
|
3 |
+
size 9999378816
|
model-00021-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fe40221f49d4af66c1f226f3e86946420c69f2b420b5ac28733d4bb2062268f
|
3 |
+
size 9999378392
|
model-00022-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a85ced77657b19c1ce76d29b807049f7491176ebfea7b06fa07ea90ecbf1b5ae
|
3 |
+
size 9999377536
|
model-00023-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a768c4fdaa149b06647017f5736034f4baaedd269df72572f75825afd7eca6f
|
3 |
+
size 9999377616
|
model-00024-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af738a2ee159b59b03e0b87e4e2034a7e176cdf4780c4eac7e695484602fa8bf
|
3 |
+
size 9999377632
|
model-00025-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:132304a4571f098282b4916e3b8a13a583ca7c0a5328ad77be6cb273be19a2d7
|
3 |
+
size 9999377624
|
model-00026-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c907e90fda69f7bf3d44a30854329d4cfde2bff26ab2982e0ee75310fb1207d
|
3 |
+
size 9999378800
|
model-00027-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9d32ae78add0604040a0a605d98c40abbb2aaa8797e1b4a9df4dfcb529fe0ae
|
3 |
+
size 9999378824
|
model-00028-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a74d281b33598b9cd88b4b57ab81ba6e854edd7c3216ac28a82e9e9686bdae9
|
3 |
+
size 9999378744
|
model-00029-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59de471593fa46ab3ec41344988ea3a4f04ffa1c75066222a7db0e3d6e201f4b
|
3 |
+
size 9999378792
|
model-00030-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fd7985d27d636866980dfb6dcdde51dd5d93f65783834862db92e915364f085
|
3 |
+
size 9999378824
|
model-00031-of-00031.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05870ee02ab60261e71c8864dcc8fc65497ba11994a831d7ed8219453fdb50c1
|
3 |
+
size 9609366048
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "#",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"1": {
|
6 |
+
"content": "\"",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"2": {
|
14 |
+
"content": "#",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"50256": {
|
22 |
+
"content": "<|endoftext|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"bos_token": "<|endoftext|>",
|
31 |
+
"clean_up_tokenization_spaces": true,
|
32 |
+
"eos_token": "#",
|
33 |
+
"errors": "replace",
|
34 |
+
"model_max_length": 1024,
|
35 |
+
"pad_token": "<|endoftext|>",
|
36 |
+
"tokenizer_class": "GPT2Tokenizer",
|
37 |
+
"unk_token": "<|endoftext|>"
|
38 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|