CorticalStack commited on
Commit
5ee0a55
1 Parent(s): 7612ffe

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -3,20 +3,16 @@ license: apache-2.0
3
  tags:
4
  - merge
5
  - mergekit
6
- - mlabonne/NeuralMonarch-7B
7
  - mlabonne/AlphaMonarch-7B
8
- - bardsai/jaskier-7b-dpo-v5.6
9
  - macadeliccc/MBX-7B-v3-DPO
10
  ---
11
 
12
- <img src="pastiche-crown-clown.png" alt="Pastiche crown clown logo" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
13
-
14
  # pastiche-crown-clown-7B-dare
15
 
16
  pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
17
- * [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
18
  * [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
19
- * [bardsai/jaskier-7b-dpo-v5.6](https://huggingface.co/bardsai/jaskier-7b-dpo-v5.6)
20
  * [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
21
 
22
  See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
@@ -25,22 +21,22 @@ See the paper [Language Models are Super Mario: Absorbing Abilities from Homolog
25
 
26
  ```yaml
27
  models:
28
- - model: mlabonne/NeuralMonarch-7B
29
  # No parameters necessary for base model
30
  - model: mlabonne/AlphaMonarch-7B
31
  parameters:
32
  density: 0.53
33
- weight: 0.4
34
- - model: bardsai/jaskier-7b-dpo-v5.6
35
  parameters:
36
  density: 0.53
37
- weight: 0.3
38
  - model: macadeliccc/MBX-7B-v3-DPO
39
  parameters:
40
  density: 0.53
41
- weight: 0.3
42
  merge_method: dare_ties
43
- base_model: mlabonne/NeuralMonarch-7B
44
  parameters:
45
  int8_mask: true
46
  dtype: bfloat16
 
3
  tags:
4
  - merge
5
  - mergekit
 
6
  - mlabonne/AlphaMonarch-7B
7
+ - mlabonne/NeuralMonarch-7B
8
  - macadeliccc/MBX-7B-v3-DPO
9
  ---
10
 
 
 
11
  # pastiche-crown-clown-7B-dare
12
 
13
  pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
 
14
  * [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
15
+ * [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
16
  * [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
17
 
18
  See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
 
21
 
22
  ```yaml
23
  models:
24
+ - model: bardsai/jaskier-7b-dpo-v5.6
25
  # No parameters necessary for base model
26
  - model: mlabonne/AlphaMonarch-7B
27
  parameters:
28
  density: 0.53
29
+ weight: 0.2
30
+ - model: mlabonne/NeuralMonarch-7B
31
  parameters:
32
  density: 0.53
33
+ weight: 0.4
34
  - model: macadeliccc/MBX-7B-v3-DPO
35
  parameters:
36
  density: 0.53
37
+ weight: 0.4
38
  merge_method: dare_ties
39
+ base_model: bardsai/jaskier-7b-dpo-v5.6
40
  parameters:
41
  int8_mask: true
42
  dtype: bfloat16
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "mlabonne/NeuralMonarch-7B",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -20,7 +20,7 @@
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
- "transformers_version": "4.37.2",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
1
  {
2
+ "_name_or_path": "bardsai/jaskier-7b-dpo-v5.6",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.38.1",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
mergekit_config.yml CHANGED
@@ -1,21 +1,21 @@
1
 
2
  models:
3
- - model: mlabonne/NeuralMonarch-7B
4
  # No parameters necessary for base model
5
  - model: mlabonne/AlphaMonarch-7B
6
  parameters:
7
  density: 0.53
8
- weight: 0.4
9
- - model: bardsai/jaskier-7b-dpo-v5.6
10
  parameters:
11
  density: 0.53
12
- weight: 0.3
13
  - model: macadeliccc/MBX-7B-v3-DPO
14
  parameters:
15
  density: 0.53
16
- weight: 0.3
17
  merge_method: dare_ties
18
- base_model: mlabonne/NeuralMonarch-7B
19
  parameters:
20
  int8_mask: true
21
  dtype: bfloat16
 
1
 
2
  models:
3
+ - model: bardsai/jaskier-7b-dpo-v5.6
4
  # No parameters necessary for base model
5
  - model: mlabonne/AlphaMonarch-7B
6
  parameters:
7
  density: 0.53
8
+ weight: 0.2
9
+ - model: mlabonne/NeuralMonarch-7B
10
  parameters:
11
  density: 0.53
12
+ weight: 0.4
13
  - model: macadeliccc/MBX-7B-v3-DPO
14
  parameters:
15
  density: 0.53
16
+ weight: 0.4
17
  merge_method: dare_ties
18
+ base_model: bardsai/jaskier-7b-dpo-v5.6
19
  parameters:
20
  int8_mask: true
21
  dtype: bfloat16
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1c4e94d59a1b3b2042256762a84e0fb346919d229447c3f6e1ca261a2f7983f
3
  size 1979773128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb8ec426f311795daecc23354815ed9a2f907cc13a1473561f38b5373f2b1a65
3
  size 1979773128
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ec9b61762ce02e3979f5fb1b8bb6dde49b21f173bc28b9af9b4c1506a5727fa
3
  size 1946235640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ab8e2afed31a8e39411680cdff4871574f29b00de41bdf53b3f755041c22ad
3
  size 1946235640
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1671a773f94d8cfe20254b3b29b2f43615a31ae4767b84198f4ccdf1b9a3d090
3
  size 1973490216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdef48e9255da2899e1aaa404e4201ea82a86c29e75b7d367de8674f69db2f0
3
  size 1973490216
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe6210dc00fe6d9a51f09001916629435fa8c32905fc97062eade4bdb42797f
3
  size 1979781464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7609589e5d527d8bdb50ff81d3d674dc03e6fde8e9c099771cf47c9e24bccf9
3
  size 1979781464
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7676ddc7e0c4cb453ae5a4dbe44ea9d447146314a90190e9b7c71f57052bd12a
3
  size 1946243984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94725e9ff1f3442c275c4ad893adf58d64d6830a481985abf05c402edb6e528
3
  size 1946243984
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d91264faa1d17b06508dce9d6a1d2b0dbb6887e7cdc56170f5d7d92d30cd5ab
3
  size 1923166040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ee3e7b18f5e4f937d3d6000300e472e1eed348d0ae3c1f84bc55e88e0de7e0
3
  size 1923166040
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037711c0f067a079adc4540dc9d92cedc0a35ce051bbd08632638ebcd1f2dd4a
3
  size 1946243984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9990a2b916b49644ccd7d460b900cc0f30bc8a2fc5398b5420977a3e1b2513a9
3
  size 1946243984
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb9f924ccd00a17a2e070fa78273834997dc66f9fa650e31076657abe1d72028
3
  size 788563544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b521c676f57d16eee9357407cf51a1551a4fe7c5a8b926af94ffd0246351d5
3
  size 788563544
special_tokens_map.json CHANGED
@@ -1,9 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
  "bos_token": {
8
  "content": "<s>",
9
  "lstrip": false,
@@ -19,7 +14,7 @@
19
  "single_word": false
20
  },
21
  "pad_token": {
22
- "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
 
1
  {
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
 
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<unk>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -27,23 +27,17 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<unk>",
32
- "<s>",
33
- "</s>"
34
- ],
35
  "bos_token": "<s>",
36
- "chat_template": "{% for message in messages %}{{bos_token + message['role'] + '\n' + message['content'] + eos_token + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n' }}{% endif %}",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
39
  "legacy": true,
40
- "model_max_length": 8192,
41
- "pad_token": "</s>",
42
- "padding_side": "left",
43
  "sp_model_kwargs": {},
44
  "spaces_between_special_tokens": false,
45
- "split_special_tokens": false,
46
  "tokenizer_class": "LlamaTokenizer",
47
  "unk_token": "<unk>",
48
- "use_default_system_prompt": true
49
  }
 
27
  "special": true
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
 
 
31
  "bos_token": "<s>",
 
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
  "legacy": true,
35
+ "model_max_length": 32768,
36
+ "pad_token": "<unk>",
37
+ "padding_side": "right",
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
 
40
  "tokenizer_class": "LlamaTokenizer",
41
  "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
  }