Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

README.md +8 -12
config.json +2 -2
mergekit_config.yml +6 -6
model-00001-of-00008.safetensors +1 -1
model-00002-of-00008.safetensors +1 -1
model-00003-of-00008.safetensors +1 -1
model-00004-of-00008.safetensors +1 -1
model-00005-of-00008.safetensors +1 -1
model-00006-of-00008.safetensors +1 -1
model-00007-of-00008.safetensors +1 -1
model-00008-of-00008.safetensors +1 -1
special_tokens_map.json +1 -6
tokenizer_config.json +5 -11

README.md CHANGED Viewed

@@ -3,20 +3,16 @@ license: apache-2.0
 tags:
 - merge
 - mergekit
-- mlabonne/NeuralMonarch-7B
 - mlabonne/AlphaMonarch-7B
-- bardsai/jaskier-7b-dpo-v5.6
 - macadeliccc/MBX-7B-v3-DPO
 ---
-<img src="pastiche-crown-clown.png" alt="Pastiche crown clown logo" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
 # pastiche-crown-clown-7B-dare
 pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
-* [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
 * [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
-* [bardsai/jaskier-7b-dpo-v5.6](https://huggingface.co/bardsai/jaskier-7b-dpo-v5.6)
 * [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
 See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
@@ -25,22 +21,22 @@ See the paper [Language Models are Super Mario: Absorbing Abilities from Homolog
 ```yaml
 models:
-  - model: mlabonne/NeuralMonarch-7B
     # No parameters necessary for base model
   - model: mlabonne/AlphaMonarch-7B
     parameters:
       density: 0.53
-      weight: 0.4
-  - model: bardsai/jaskier-7b-dpo-v5.6
     parameters:
       density: 0.53
-      weight: 0.3
   - model: macadeliccc/MBX-7B-v3-DPO
     parameters:
       density: 0.53
-      weight: 0.3
 merge_method: dare_ties
-base_model: mlabonne/NeuralMonarch-7B
 parameters:
   int8_mask: true
 dtype: bfloat16

 tags:
 - merge
 - mergekit
 - mlabonne/AlphaMonarch-7B
+- mlabonne/NeuralMonarch-7B
 - macadeliccc/MBX-7B-v3-DPO
 ---
 # pastiche-crown-clown-7B-dare
 pastiche-crown-clown-7B-dare is a DARE merge of the following models using [mergekit](https://github.com/cg123/mergekit):
 * [mlabonne/AlphaMonarch-7B](https://huggingface.co/mlabonne/AlphaMonarch-7B)
+* [mlabonne/NeuralMonarch-7B](https://huggingface.co/mlabonne/NeuralMonarch-7B)
 * [macadeliccc/MBX-7B-v3-DPO](https://huggingface.co/macadeliccc/MBX-7B-v3-DPO)
 See the paper [Language Models are Super Mario: Absorbing Abilities from Homologous Models as a Free Lunch](https://arxiv.org/abs/2311.03099) for more on the method.
 ```yaml
 models:
+  - model: bardsai/jaskier-7b-dpo-v5.6
     # No parameters necessary for base model
   - model: mlabonne/AlphaMonarch-7B
     parameters:
       density: 0.53
+      weight: 0.2
+  - model: mlabonne/NeuralMonarch-7B
     parameters:
       density: 0.53
+      weight: 0.4
   - model: macadeliccc/MBX-7B-v3-DPO
     parameters:
       density: 0.53
+      weight: 0.4
 merge_method: dare_ties
+base_model: bardsai/jaskier-7b-dpo-v5.6
 parameters:
   int8_mask: true
 dtype: bfloat16

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "mlabonne/NeuralMonarch-7B",
   "architectures": [
     "MistralForCausalLM"
   ],
@@ -20,7 +20,7 @@
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.2",
   "use_cache": true,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "bardsai/jaskier-7b-dpo-v5.6",
   "architectures": [
     "MistralForCausalLM"
   ],
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.1",
   "use_cache": true,
   "vocab_size": 32000
 }

mergekit_config.yml CHANGED Viewed

@@ -1,21 +1,21 @@
 models:
-  - model: mlabonne/NeuralMonarch-7B
     # No parameters necessary for base model
   - model: mlabonne/AlphaMonarch-7B
     parameters:
       density: 0.53
-      weight: 0.4
-  - model: bardsai/jaskier-7b-dpo-v5.6
     parameters:
       density: 0.53
-      weight: 0.3
   - model: macadeliccc/MBX-7B-v3-DPO
     parameters:
       density: 0.53
-      weight: 0.3
 merge_method: dare_ties
-base_model: mlabonne/NeuralMonarch-7B
 parameters:
   int8_mask: true
 dtype: bfloat16

 models:
+  - model: bardsai/jaskier-7b-dpo-v5.6
     # No parameters necessary for base model
   - model: mlabonne/AlphaMonarch-7B
     parameters:
       density: 0.53
+      weight: 0.2
+  - model: mlabonne/NeuralMonarch-7B
     parameters:
       density: 0.53
+      weight: 0.4
   - model: macadeliccc/MBX-7B-v3-DPO
     parameters:
       density: 0.53
+      weight: 0.4
 merge_method: dare_ties
+base_model: bardsai/jaskier-7b-dpo-v5.6
 parameters:
   int8_mask: true
 dtype: bfloat16

model-00001-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1c4e94d59a1b3b2042256762a84e0fb346919d229447c3f6e1ca261a2f7983f
 size 1979773128

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb8ec426f311795daecc23354815ed9a2f907cc13a1473561f38b5373f2b1a65
 size 1979773128

model-00002-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ec9b61762ce02e3979f5fb1b8bb6dde49b21f173bc28b9af9b4c1506a5727fa
 size 1946235640

 version https://git-lfs.github.com/spec/v1
+oid sha256:96ab8e2afed31a8e39411680cdff4871574f29b00de41bdf53b3f755041c22ad
 size 1946235640

model-00003-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1671a773f94d8cfe20254b3b29b2f43615a31ae4767b84198f4ccdf1b9a3d090
 size 1973490216

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fdef48e9255da2899e1aaa404e4201ea82a86c29e75b7d367de8674f69db2f0
 size 1973490216

model-00004-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fe6210dc00fe6d9a51f09001916629435fa8c32905fc97062eade4bdb42797f
 size 1979781464

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7609589e5d527d8bdb50ff81d3d674dc03e6fde8e9c099771cf47c9e24bccf9
 size 1979781464

model-00005-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7676ddc7e0c4cb453ae5a4dbe44ea9d447146314a90190e9b7c71f57052bd12a
 size 1946243984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c94725e9ff1f3442c275c4ad893adf58d64d6830a481985abf05c402edb6e528
 size 1946243984

model-00006-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d91264faa1d17b06508dce9d6a1d2b0dbb6887e7cdc56170f5d7d92d30cd5ab
 size 1923166040

 version https://git-lfs.github.com/spec/v1
+oid sha256:00ee3e7b18f5e4f937d3d6000300e472e1eed348d0ae3c1f84bc55e88e0de7e0
 size 1923166040

model-00007-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:037711c0f067a079adc4540dc9d92cedc0a35ce051bbd08632638ebcd1f2dd4a
 size 1946243984

 version https://git-lfs.github.com/spec/v1
+oid sha256:9990a2b916b49644ccd7d460b900cc0f30bc8a2fc5398b5420977a3e1b2513a9
 size 1946243984

model-00008-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb9f924ccd00a17a2e070fa78273834997dc66f9fa650e31076657abe1d72028
 size 788563544

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9b521c676f57d16eee9357407cf51a1551a4fe7c5a8b926af94ffd0246351d5
 size 788563544

special_tokens_map.json CHANGED Viewed

@@ -1,9 +1,4 @@
 {
-  "additional_special_tokens": [
-    "<unk>",
-    "<s>",
-    "</s>"
-  ],
   "bos_token": {
     "content": "<s>",
     "lstrip": false,
@@ -19,7 +14,7 @@
     "single_word": false
   },
   "pad_token": {
-    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
   "bos_token": {
     "content": "<s>",
     "lstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -27,23 +27,17 @@
       "special": true
     }
   },
-  "additional_special_tokens": [
-    "<unk>",
-    "<s>",
-    "</s>"
-  ],
   "bos_token": "<s>",
-  "chat_template": "{% for message in messages %}{{bos_token + message['role'] + '\n' + message['content'] + eos_token + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
-  "model_max_length": 8192,
-  "pad_token": "</s>",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
-  "split_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
-  "use_default_system_prompt": true
 }

       "special": true
     }
   },
+  "additional_special_tokens": [],
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
+  "model_max_length": 32768,
+  "pad_token": "<unk>",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }