improved slerp merge

Browse files

Files changed (14) hide show

README.md +1 -3
config.json +9 -9
mergekit_config.yml +1 -1
model-00001-of-00008.safetensors +1 -1
model-00002-of-00008.safetensors +1 -1
model-00003-of-00008.safetensors +1 -1
model-00004-of-00008.safetensors +1 -1
model-00005-of-00008.safetensors +1 -1
model-00006-of-00008.safetensors +1 -1
model-00007-of-00008.safetensors +1 -1
model-00008-of-00008.safetensors +1 -1
special_tokens_map.json +10 -3
tokenizer.json +0 -0
tokenizer_config.json +9 -6

README.md CHANGED Viewed

@@ -1,6 +1,5 @@
 ---
 base_model:
-- psmathur/orca_mini_v3_13b
 - jondurbin/bagel-dpo-34b-v0.2
 - one-man-army/UNA-34Beagles-32K-bf16-v1
 tags:
@@ -20,7 +19,6 @@ This model was merged using the SLERP merge method.
 ### Models Merged
 The following models were included in the merge:
-* [psmathur/orca_mini_v3_13b](https://huggingface.co/psmathur/orca_mini_v3_13b)
 * [jondurbin/bagel-dpo-34b-v0.2](https://huggingface.co/jondurbin/bagel-dpo-34b-v0.2)
 * [one-man-army/UNA-34Beagles-32K-bf16-v1](https://huggingface.co/one-man-army/UNA-34Beagles-32K-bf16-v1)
@@ -29,7 +27,7 @@ The following models were included in the merge:
 The following YAML configuration was used to produce this model:
 ```yaml
-base_model: psmathur/orca_mini_v3_13b
 dtype: float16
 merge_method: slerp
 parameters:

 ---
 base_model:
 - jondurbin/bagel-dpo-34b-v0.2
 - one-man-army/UNA-34Beagles-32K-bf16-v1
 tags:
 ### Models Merged
 The following models were included in the merge:
 * [jondurbin/bagel-dpo-34b-v0.2](https://huggingface.co/jondurbin/bagel-dpo-34b-v0.2)
 * [one-man-army/UNA-34Beagles-32K-bf16-v1](https://huggingface.co/one-man-army/UNA-34Beagles-32K-bf16-v1)
 The following YAML configuration was used to produce this model:
 ```yaml
+base_model: jondurbin/bagel-dpo-34b-v0.2
 dtype: float16
 merge_method: slerp
 parameters:

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "psmathur/orca_mini_v3_13b",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -8,22 +8,22 @@
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
-  "hidden_size": 5120,
   "initializer_range": 0.02,
-  "intermediate_size": 13824,
-  "max_position_embeddings": 4096,
   "model_type": "llama",
-  "num_attention_heads": 40,
   "num_hidden_layers": 60,
-  "num_key_value_heads": 40,
   "pad_token_id": 0,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
-  "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
   "transformers_version": "4.36.2",
-  "use_cache": true,
-  "vocab_size": 32000
 }

 {
+  "_name_or_path": "jondurbin/bagel-dpo-34b-v0.2",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "bos_token_id": 1,
   "eos_token_id": 2,
   "hidden_act": "silu",
+  "hidden_size": 7168,
   "initializer_range": 0.02,
+  "intermediate_size": 20480,
+  "max_position_embeddings": 200000,
   "model_type": "llama",
+  "num_attention_heads": 56,
   "num_hidden_layers": 60,
+  "num_key_value_heads": 8,
   "pad_token_id": 0,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
+  "rope_theta": 5000000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
   "transformers_version": "4.36.2",
+  "use_cache": false,
+  "vocab_size": 64000
 }

mergekit_config.yml CHANGED Viewed

@@ -1,4 +1,4 @@
-base_model: psmathur/orca_mini_v3_13b
 dtype: float16
 merge_method: slerp
 parameters:

+base_model: jondurbin/bagel-dpo-34b-v0.2
 dtype: float16
 merge_method: slerp
 parameters:

model-00001-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:640be106b55cb78795c71b3e1328b471d07b2740808ac119aaa077f22f560234
 size 9843206232

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f4c6ab28e0d145a29d6466db086109a11aaf102751ab8aaaca7a1da3e9ff0a6
 size 9843206232

model-00002-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6f2d217b1a9839010563ff74f285099c9b367424a90b2b1aea366eb462e176e
 size 9747829808

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba00e6a8f51e1037e8c82c74e4c8acfec36199a43a2d944ec5bf44b2ace58616
 size 9747829808

model-00003-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ce6097507dc4e389d51aaea624a1841786f6572e9664f695772df8e2767fe2d
 size 9747829832

 version https://git-lfs.github.com/spec/v1
+oid sha256:54e4c33a41ddf2b57a3d39ee648cf96b9b0edf8fa58249bf88af1459cac3f320
 size 9747829832

model-00004-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e41c37dbc5afe019199dae3efb5b7242c21071e1e78402e4d107e6f84bea980
 size 9806549736

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7b957b23039dcf541a19cb4c6ab14f86bb308bd271556d0cc00fb1ddaec70cc
 size 9806549736

model-00005-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfdc0766c3adc7277b1cb8d36cb04b131eaca13da3f812676206c93f5813adb0
 size 9747829824

 version https://git-lfs.github.com/spec/v1
+oid sha256:e23806eb8bca859305a3a0d03b81dff815cb56750ee88114511b9a27a503221e
 size 9747829824

model-00006-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2a9b5e5efdbe430424b0d0eb71f850341fc1333f1a4382f75d780af5ef72cdf
 size 9747800928

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef0147367264724cda46d6c367c4b934059774141ffdbec586d732e9ae20133e
 size 9747800928

model-00007-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e523b01ef4d3aa1ed2de7420171fae1741cb7a7041c503496719d615c05d5ded
 size 9843220744

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d604585188916690f0904f66efbd55598879a1f6151f77e69d4ec909fadfe2b
 size 9843220744

model-00008-of-00008.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce3ea7469ea0ddcc25987c71d946ce2aeb922aef5a5657ebc7d2111fed39e045
 size 293630304

 version https://git-lfs.github.com/spec/v1
+oid sha256:732cb9850f78637c0ea75d6eb4c722aa6ea70866bc83c104d8e3a9f42f25ab2c
 size 293630304

special_tokens_map.json CHANGED Viewed

@@ -2,21 +2,28 @@
   "bos_token": {
     "content": "<s>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

   "bos_token": {
     "content": "<s>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
     "content": "<unk>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
-  "add_bos_token": true,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -13,7 +13,7 @@
     "1": {
       "content": "<s>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -21,17 +21,20 @@
     "2": {
       "content": "</s>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "model_max_length": 4096,
-  "pad_token": null,
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",

 {
+  "add_bos_token": false,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     "1": {
       "content": "<s>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     "2": {
       "content": "</s>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
+  "chat_template": "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 200000,
+  "pad_token": "<unk>",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",