FuturisticVibes commited on about 1 month ago

Commit

ec8d95e

•

1 Parent(s): 71f2015

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +72 -0
added_tokens.json +4 -0
config.json +42 -0
generation_config.json +7 -0
latest +1 -0
model.safetensors.index.json +0 -0
output-00001-of-00009.safetensors +3 -0
output-00002-of-00009.safetensors +3 -0
output-00003-of-00009.safetensors +3 -0
output-00004-of-00009.safetensors +3 -0
output-00005-of-00009.safetensors +3 -0
output-00006-of-00009.safetensors +3 -0
output-00007-of-00009.safetensors +3 -0
output-00008-of-00009.safetensors +3 -0
output-00009-of-00009.safetensors +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +59 -0

README.md ADDED Viewed

	@@ -0,0 +1,72 @@

+---
+license: apache-2.0
+base_model: mistral-community/Mixtral-8x22B-v0.1
+tags:
+- generated_from_trainer
+- axolotl
+model-index:
+- name: out
+  results: []
+datasets:
+- cognitivecomputations/Dolphin-2.9.2
+- cognitivecomputations/SystemChat-2.0
+- teknium/OpenHermes-2.5
+- m-a-p/CodeFeedback-Filtered-Instruction
+- cognitivecomputations/dolphin-coder
+- cognitivecomputations/samantha-data
+- HuggingFaceH4/ultrachat_200k
+- microsoft/orca-math-word-problems-200k
+- abacusai/SystemChat-1.1
+- Locutusque/function-calling-chatml
+- internlm/Agent-FLAN
+language:
+- en
+---
+# Dolphin 2.9.2 Mixtral 8x22b 🐬
+Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
+[![Discord](https://img.shields.io/discord/1156064224225808488?logo=Discord&logoColor=%23ffffff&label=Discord&link=https%3A%2F%2Fdiscord.gg%2FtCMkMDDHwm)](https://discord.gg/cognitivecomputations)
+Discord: https://discord.gg/cognitivecomputations
+<img src="https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png" width="600" />
+New in 2.9.2 is SystemChat 2.0 - a dataset designed to teach Dolphin to obey the system prompt, even over a long conversation.
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/z1u6U91tL-H__7JCDbWys.png)
+My appreciation for the sponsors of Dolphin 2.9.2:
+- [Crusoe Cloud](https://crusoe.ai/) - provided excellent on-demand 8xH100 node
+- [OnDemand](https://on-demand.io/) - provided inference sponsorship, enabling creation of SystemChat
+This model is based on Dolphin-2.9-Mixtral-8x22b, and is Apache-2.0 licensed.
+The base model has 64k context, and fine-tuning was with 16k sequence length.
+It took 1 week on 8xH100 provided by Crusoe Cloud
+This model was trained FFT on 50% parameters (targeted with [Laser Scanner](https://github.com/cognitivecomputations/laserRMT/blob/main/laser_scanner.py) by Fernando Fernandes, David Golchinfar, Lucas Atkins, and Eric Hartford), using ChatML prompt template format.
+example:
+```
+<|im_start|>system
+You are Dolphin, a helpful AI assistant.<|im_end|>
+<|im_start|>user
+{prompt}<|im_end|>
+<|im_start|>assistant
+```
+Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling.
+Dolphin is uncensored. I have filtered the dataset to remove alignment and bias. This makes the model more compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. Please read my blog post about uncensored models. https://erichartford.com/uncensored-models You are responsible for any content you create using this model. Enjoy responsibly.
+Dolphin is licensed Apache 2.0.  I grant permission for any use, including commercial, that falls within accordance with Apache-2.0 license.  Dolphin was trained on data generated from GPT4, among other models.
+## Evals
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/SDWV3SvJ8xR1gjl1z0LyO.png)
+## Training

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<|im_end|>": 32000,
+  "<|im_start|>": 32001
+}

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+    "_name_or_path": "mistralai/Mixtral-8x22B-v0.1",
+    "architectures": [
+        "MixtralForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 32000,
+    "hidden_act": "silu",
+    "hidden_size": 6144,
+    "initializer_range": 0.02,
+    "intermediate_size": 16384,
+    "max_position_embeddings": 65536,
+    "model_type": "mixtral",
+    "num_attention_heads": 48,
+    "num_experts_per_tok": 2,
+    "num_hidden_layers": 56,
+    "num_key_value_heads": 8,
+    "num_local_experts": 8,
+    "output_router_logits": false,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 1000000,
+    "router_aux_loss_coef": 0.001,
+    "router_jitter_noise": 0.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.40.2",
+    "use_cache": false,
+    "vocab_size": 32002,
+    "quantization_config": {
+        "quant_method": "exl2",
+        "version": "0.1.5",
+        "bits": 4.0,
+        "head_bits": 8,
+        "calibration": {
+            "rows": 100,
+            "length": 2048,
+            "dataset": "(default)"
+        }
+    }
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "do_sample": true,
+  "eos_token_id": 2,
+  "transformers_version": "4.40.2"
+}

latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step1442

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

output-00001-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:915093eea880898eba4bbf7765c14a700b1e33e05d4c210ff52b6cf6ce60cea2
+size 8590112176

output-00002-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46f49e396ae2a5f4cc1ea84167ea00b0c07fc868732332199107eb4ad8d1b419
+size 8562191824

output-00003-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:384d93709d6ad9f1ce3367764b4c8f90ca9feedc435383ec3fb41d19c81ddb80
+size 8572387368

output-00004-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:346d5eaf88aef48ff42407789c8fb1fd29c56f0b93c34799937af782cf4b39e9
+size 8589911072

output-00005-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:066a0aca9015c17bc26ea76b173acceb9ae1fbc9c958ef888d4609b8eb6e421c
+size 8542506432

output-00006-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e06585eff3d95e9a2fe798e2afe54992d0716bc6ed871ca92c7bd82df083ccd2
+size 8543848168

output-00007-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75d339471b960583b116ace8fc6e02c2b21d0aa05ee999214c24d688d01500cf
+size 8590019024

output-00008-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f351c8f402b733e867819d5d0f325a1d7ba8b20590d9121ae7b511a3c7b325b
+size 8538148656

output-00009-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:544f1899dbbd32cc006995c9a3eae543ba3d8d478fce6a3ef29bf54c182ad2d6
+size 2170868928

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}