FluffyKaeloky commited on May 29

Commit

f0b1d82

•

1 Parent(s): 7d8e229

Upload 17 files

Browse files

Files changed (17) hide show

README.md +26 -0
config.json +41 -0
mergekit_config.yml +88 -0
model.safetensors.index.json +0 -0
output-00001-of-00009.safetensors +3 -0
output-00002-of-00009.safetensors +3 -0
output-00003-of-00009.safetensors +3 -0
output-00004-of-00009.safetensors +3 -0
output-00005-of-00009.safetensors +3 -0
output-00006-of-00009.safetensors +3 -0
output-00007-of-00009.safetensors +3 -0
output-00008-of-00009.safetensors +3 -0
output-00009-of-00009.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +42 -0

README.md ADDED Viewed

	@@ -0,0 +1,26 @@

+---
+base_model:
+- jukofyork/Dark-Miqu-70B
+- sophosympatheia/Midnight-Miqu-70B-v1.5
+- jukofyork/Dawn-Miqu-70B
+library_name: transformers
+tags:
+- mergekit
+- merge
+license: other
+---
+Twilight Miqu is a Story writing model and is composed from sophosympatheia/Midnight-Miqu-70B-v1.5, jukofyork/Dawn-Miqu-70B and jukofyork/Dark-Miqu-70B
+It is an experiment to see if large models are more coherent on story writing tasks.
+Twilight = Midnight + Dawn + Dark
+Please see this model card for details and usage instructions.
+https://huggingface.co/sophosympatheia/Midnight-Miqu-70B-v1.5
+This model is based on Miqu so it's capable of 32K context.
+All miqu-derived models, including this merge, are only suitable for personal use. Mistral has been cool about it so far, but you should be aware that by downloading this merge you are assuming whatever legal risk is inherent in acquiring and using a model based on leaked weights. This merge comes with no warranties or guarantees of any kind, but you probably already knew that.
+This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
+A big thank you to Mistral, sophosympatheia and jukofyork for the original models!

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "_name_or_path": "softwareweaver/Twilight-Miqu-146B",
+    "architectures": [
+        "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "hidden_act": "silu",
+    "hidden_size": 8192,
+    "initializer_range": 0.02,
+    "intermediate_size": 28672,
+    "max_position_embeddings": 32764,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 64,
+    "num_hidden_layers": 170,
+    "num_key_value_heads": 8,
+    "pad_token_id": 0,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float16",
+    "transformers_version": "4.41.1",
+    "use_cache": true,
+    "vocab_size": 32000,
+    "quantization_config": {
+        "quant_method": "exl2",
+        "version": "0.1.1",
+        "bits": 3.75,
+        "head_bits": 6,
+        "calibration": {
+            "rows": 100,
+            "length": 2048,
+            "dataset": "(default)"
+        }
+    }
+}

mergekit_config.yml ADDED Viewed

	@@ -0,0 +1,88 @@

+const_tag: &MODEL1 sophosympatheia/Midnight-Miqu-70B-v1.5
+const_tag: &MODEL3 jukofyork/Dawn-Miqu-70B
+const_tag: &MODEL2 jukofyork/Dark-Miqu-70B
+const_tag: &QK_ATTENUATION_FACTOR 0.8408964153  # sqrt(sqrt(1/2))
+const_tag: &MLP_DOWN_SCALE_FACTOR 0.7071067812  # sqrt(1/2)
+scale-filter-env: &scale_filter_env
+  parameters:
+    scale:
+      - filter: q_proj
+        value: *QK_ATTENUATION_FACTOR
+      - filter: k_proj
+        value: *QK_ATTENUATION_FACTOR
+      - filter: down_proj
+        value: *MLP_DOWN_SCALE_FACTOR
+      - value: 1.0
+slices:
+  - sources:
+    - model: *MODEL1
+      layer_range: [0, 10]
+  - sources:
+    - model: *MODEL1
+      layer_range: [10, 20]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [10, 20]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL3
+      layer_range: [10, 20]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL3
+      layer_range: [20, 30]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [20, 30]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL1
+      layer_range: [30, 40]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [30, 40]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL3
+      layer_range: [40, 50]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [40, 50]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL1
+      layer_range: [50, 60]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [50, 60]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL3
+      layer_range: [50, 60]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL1
+      layer_range: [60, 70]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL2
+      layer_range: [60, 70]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL3
+      layer_range: [60, 70]
+      <<: *scale_filter_env
+  - sources:
+    - model: *MODEL1
+      layer_range: [70, 80]
+merge_method: passthrough
+dtype: float16

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

output-00001-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d90f8af1becfbebb7c95a8c33139538e4bec9d102d468f39347fe3e303fb12b8
+size 8587267724

output-00002-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:451f5f2985b252730c8e1de611ee83f519287814d5f13e045fbb2a8d6abd4937
+size 8580363048

output-00003-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0b58db45c4310a51e89c1b36737413fb025d1857895e490da96c68d3fde0f51
+size 8574498044

output-00004-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c08a55b8bbef71e88a857321ec62cfa85648f1235fdb36bd244a2341ef32a30a
+size 8478075972

output-00005-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76e715679491eac55a8033925398b505e68524e5e01679d8e13103dce209250d
+size 8582377760

output-00006-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ade61e7994208da9b551161fb9d09726a2dca18ef9aab1846f3c398ad1c6e7a
+size 8549690360

output-00007-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d7c728209163e670f056547830f52f2e11c63db38c0e49085f637d113a221d5
+size 8520705388

output-00008-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99f5a627b4fad8159026dc081735b9d371256d85a82d5c059a3efca4afa004ff
+size 8572805704

output-00009-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fd4111ea9deac2576a45332f9d3f4172862f4b7fc6c627fad567ba47e9c9b1f
+size 473836674

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}