Upload folder using huggingface_hub
Browse files- mergekit_moe_config.yml +4 -65
- model-00005-of-00005.safetensors +1 -1
- special_tokens_map.json +2 -1
- tokenizer_config.json +1 -0
mergekit_moe_config.yml
CHANGED
@@ -5,71 +5,10 @@ dtype: bfloat16
|
|
5 |
experts_per_token: 2
|
6 |
experts:
|
7 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
8 |
-
positive_prompts:
|
9 |
-
- "What are the different"
|
10 |
-
- "what are the distinct"
|
11 |
-
- "Give me the unique"
|
12 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
13 |
-
positive_prompts:
|
14 |
-
- "When"
|
15 |
-
- "when"
|
16 |
-
- "Where"
|
17 |
-
- "where"
|
18 |
-
- "Which"
|
19 |
-
- "which"
|
20 |
-
- "Who"
|
21 |
-
- "who"
|
22 |
-
- "What"
|
23 |
-
- "what"
|
24 |
-
- "Whom"
|
25 |
-
- "whom"
|
26 |
-
- "Whose"
|
27 |
-
- "whose"
|
28 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
29 |
-
positive_prompts:
|
30 |
-
- "Larger"
|
31 |
-
- "larger"
|
32 |
-
- "Smaller"
|
33 |
-
- "smaller"
|
34 |
-
- "Bigger"
|
35 |
-
- "bigger"
|
36 |
-
- "Smallest"
|
37 |
-
- "smallest"
|
38 |
-
- "Largest"
|
39 |
-
- "largest"
|
40 |
-
- "Biggest"
|
41 |
-
- "biggest"
|
42 |
-
- "Most"
|
43 |
-
- "most"
|
44 |
-
- "Least"
|
45 |
-
- "least"
|
46 |
-
- "More"
|
47 |
-
- "more"
|
48 |
-
- "Less"
|
49 |
-
- "less"
|
50 |
-
- "Number"
|
51 |
-
- "number"
|
52 |
-
- "Numbers"
|
53 |
-
- "numbers"
|
54 |
-
- "Quantity"
|
55 |
-
- "quantity"
|
56 |
-
- "At least"
|
57 |
-
- "at least"
|
58 |
-
- "At most"
|
59 |
-
- "at most"
|
60 |
-
- "Greater"
|
61 |
-
- "greater"
|
62 |
-
- "Fewer"
|
63 |
-
- "fewer"
|
64 |
-
- "Than"
|
65 |
-
- "than"
|
66 |
-
- "Equal"
|
67 |
-
- "equal"
|
68 |
-
- "Same"
|
69 |
-
- "same"
|
70 |
-
- "Equal to"
|
71 |
-
- "equal to"
|
72 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
73 |
-
positive_prompts:
|
74 |
-
- "that also"
|
75 |
-
- "who have the same"
|
|
|
5 |
experts_per_token: 2
|
6 |
experts:
|
7 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
8 |
+
positive_prompts: []
|
|
|
|
|
|
|
9 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
10 |
+
positive_prompts: []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
12 |
+
positive_prompts: []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
- source_model: meta-llama/Meta-Llama-3-8B-Instruct
|
14 |
+
positive_prompts: []
|
|
|
|
model-00005-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9967007856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8e6de51562fbb8dbda2f705a6af7200cc1272963705af0ed90dd17d36c217e0
|
3 |
size 9967007856
|
special_tokens_map.json
CHANGED
@@ -12,5 +12,6 @@
|
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
-
}
|
|
|
16 |
}
|
|
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|begin_of_text|>"
|
17 |
}
|
tokenizer_config.json
CHANGED
@@ -2058,5 +2058,6 @@
|
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
2061 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2062 |
}
|
|
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
"model_max_length": 1000000000000000019884624838656,
|
2061 |
+
"pad_token": "<|begin_of_text|>",
|
2062 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2063 |
}
|