jtatman
/

TinyMistral-248m-v2.5-4x-Moe

@@ -8,13 +8,13 @@ tags:
 - lazymergekit
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2-Instruct
-- Locutusque/TinyMistral-248M-Instruct
 base_model:
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2-Instruct
-- Locutusque/TinyMistral-248M-Instruct
 ---
 # TinyMistral-248m-v2.5-4x-Moe
@@ -22,21 +22,19 @@ base_model:
 TinyMistral-248m-v2.5-4x-Moe is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
 * [Locutusque/TinyMistral-248M-v2.5-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5-Instruct)
 * [Locutusque/TinyMistral-248M-v2.5-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5-Instruct)
 * [Locutusque/TinyMistral-248M-v2-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2-Instruct)
-* [Locutusque/TinyMistral-248M-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-Instruct)
 ## 🧩 Configuration
 ```yaml
-base_model: Locutusque/TinyMistral-248M-v2.5
 experts:
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
       - "Help me debug this code."
-      - "Optimize this C# script."
-      - "Implement this feature using JavaScript."
-      - "Convert this HTML structure into a more efficient design."
-      - "Assist me with writing a program that"
     negative_prompts:
       - "How do you"
       - "Explain the concept of"
@@ -47,26 +45,27 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
-      - "How do you"
-      - "Explain the concept of"
-      - "Give an overview of"
-      - "Compare and contrast between"
-      - "Provide information about"
-      - "Help me understand"
-      - "Summarize"
-      - "Make a recommendation on"
-      - "Answer this question"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Implement this feature using JavaScript."
       - "Convert this HTML structure into a more efficient design."
       - "Assist me with writing a program that"
-  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
-    positive_prompts:
       - "How do I incorporate visual elements into my writing?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
@@ -82,11 +81,16 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
-  - source_model: Locutusque/TinyMistral-248M-Instruct
     positive_prompts:
       - "Craft me a list of some nice places to visit around the world. "
       - "Write me a story"
       - "Write me an essay"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
@@ -102,7 +106,7 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
 gate_mode: hidden
 ```

 - lazymergekit
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2.5-Instruct
+- Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2-Instruct
 base_model:
 - Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2.5-Instruct
+- Locutusque/TinyMistral-248M-v2.5-Instruct
 - Locutusque/TinyMistral-248M-v2-Instruct
 ---
 # TinyMistral-248m-v2.5-4x-Moe
 TinyMistral-248m-v2.5-4x-Moe is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
 * [Locutusque/TinyMistral-248M-v2.5-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5-Instruct)
 * [Locutusque/TinyMistral-248M-v2.5-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5-Instruct)
+* [Locutusque/TinyMistral-248M-v2.5-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5-Instruct)
 * [Locutusque/TinyMistral-248M-v2-Instruct](https://huggingface.co/Locutusque/TinyMistral-248M-v2-Instruct)
 ## 🧩 Configuration
 ```yaml
+base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
 experts:
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
+      - "Write me a Python program that calculates the factorial of n."
       - "Help me debug this code."
+      - "Optimize this C++ program."
     negative_prompts:
       - "How do you"
       - "Explain the concept of"
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
+      - "How do I incorporate visual elements into my writing?"
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
+      - "What is the product of 2 x 5 x 18?"
+      - "How do I guess the value of x for the function f(x) = x^4 - 2x^2 - 1?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Implement this feature using JavaScript."
       - "Convert this HTML structure into a more efficient design."
       - "Assist me with writing a program that"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
       - "How do I incorporate visual elements into my writing?"
+  - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
+    positive_prompts:
+      - "How do I incorporate fewer visual elements into my art but retain impact?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
+  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
     positive_prompts:
       - "Craft me a list of some nice places to visit around the world. "
       - "Write me a story"
       - "Write me an essay"
+      - "Create a fantasy story about"
+      - "Tell me about the wild fjords."
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "How do I incorporate visual elements into my writing?"
 gate_mode: hidden
 ```

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "Locutusque/TinyMistral-248M-v2.5",
   "architectures": [
     "MixtralForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "router_aux_loss_coef": 0.001,
   "sliding_window": null,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
   "transformers_version": "4.37.2",
   "use_cache": true,
   "vocab_size": 32005

 {
+  "_name_or_path": "Locutusque/TinyMistral-248M-v2.5-Instruct",
   "architectures": [
     "MixtralForCausalLM"
   ],
   "router_aux_loss_coef": 0.001,
   "sliding_window": null,
   "tie_word_embeddings": false,
+  "torch_dtype": "float32",
   "transformers_version": "4.37.2",
   "use_cache": true,
   "vocab_size": 32005

mergekit_moe_config.yml CHANGED Viewed

@@ -1,13 +1,11 @@
-base_model: Locutusque/TinyMistral-248M-v2.5
 experts:
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
       - "Help me debug this code."
-      - "Optimize this C# script."
-      - "Implement this feature using JavaScript."
-      - "Convert this HTML structure into a more efficient design."
-      - "Assist me with writing a program that"
     negative_prompts:
       - "How do you"
       - "Explain the concept of"
@@ -18,26 +16,27 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
-      - "How do you"
-      - "Explain the concept of"
-      - "Give an overview of"
-      - "Compare and contrast between"
-      - "Provide information about"
-      - "Help me understand"
-      - "Summarize"
-      - "Make a recommendation on"
-      - "Answer this question"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Implement this feature using JavaScript."
       - "Convert this HTML structure into a more efficient design."
       - "Assist me with writing a program that"
-  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
-    positive_prompts:
       - "How do I incorporate visual elements into my writing?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
@@ -53,11 +52,16 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
-  - source_model: Locutusque/TinyMistral-248M-Instruct
     positive_prompts:
       - "Craft me a list of some nice places to visit around the world. "
       - "Write me a story"
       - "Write me an essay"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
@@ -73,5 +77,5 @@ experts:
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
 gate_mode: hidden

+base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
 experts:
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
+      - "Write me a Python program that calculates the factorial of n."
       - "Help me debug this code."
+      - "Optimize this C++ program."
     negative_prompts:
       - "How do you"
       - "Explain the concept of"
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
+      - "How do I incorporate visual elements into my writing?"
   - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
     positive_prompts:
+      - "What is the product of 2 x 5 x 18?"
+      - "How do I guess the value of x for the function f(x) = x^4 - 2x^2 - 1?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Implement this feature using JavaScript."
       - "Convert this HTML structure into a more efficient design."
       - "Assist me with writing a program that"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
       - "How do I incorporate visual elements into my writing?"
+  - source_model: Locutusque/TinyMistral-248M-v2.5-Instruct
+    positive_prompts:
+      - "How do I incorporate fewer visual elements into my art but retain impact?"
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "Craft me a list of some nice places to visit around the world. "
+      - "Write me a story"
+      - "Write me an essay"
+  - source_model: Locutusque/TinyMistral-248M-v2-Instruct
     positive_prompts:
       - "Craft me a list of some nice places to visit around the world. "
       - "Write me a story"
       - "Write me an essay"
+      - "Create a fantasy story about"
+      - "Tell me about the wild fjords."
     negative_prompts:
       - "Help me debug this code."
       - "Optimize this C# script."
       - "Summarize"
       - "Make a recommendation on"
       - "Answer this question"
+      - "How do I incorporate visual elements into my writing?"
 gate_mode: hidden

model-00001-of-00001.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09a6143c521ff0c20f1df03d4a92db7e6a97a71e25ad40f9de4092d3c2fe2a48
-size 1402144424

 version https://git-lfs.github.com/spec/v1
+oid sha256:514a8f6bc5248309422ad425e94929e1521d765dd30f65d4cf8dbc841edc28e7
+size 2804260704

tokenizer_config.json CHANGED Viewed

@@ -74,7 +74,10 @@
   "legacy": true,
   "max_length": 1536,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|bos|>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "stride": 0,

   "legacy": true,
   "max_length": 1536,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_to_multiple_of": null,
   "pad_token": "<|bos|>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "stride": 0,