turboderp commited on
Commit
a79acc2
1 Parent(s): a28dc01
README.md CHANGED
@@ -1,15 +1,120 @@
1
- EXL2 quants of [Mixtral 8x22B Instruct v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1/tree/main)
 
 
2
 
3
- More bitrates still cooking
 
4
 
5
- [2.30 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/2.3bpw)
6
- [2.50 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/2.5bpw)
7
- [2.70 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/2.7bpw)
8
- [3.00 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/3.0bpw)
9
- [3.50 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/3.5bpw)
10
- [4.00 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/4.0bpw)
11
- [4.50 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/4.5bpw)
12
- [5.00 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/5.0bpw)
13
- [6.00 bits per weight](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/tree/6.0bpw)
14
 
15
- [measurement.json](https://huggingface.co/turboderp/Mixtral-8x22B-Instruct-v0.1-exl2/blob/main/measurement.json)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
 
5
+ # Model Card for Mixtral-8x22B-Instruct-v0.1
6
+ The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
7
 
8
+ ## Run the model
9
+ ```python
10
+ from transformers import AutoModelForCausalLM
11
+ from mistral_common.protocol.instruct.messages import (
12
+ AssistantMessage,
13
+ UserMessage,
14
+ )
15
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
16
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
17
 
18
+ device = "cuda" # the device to load the model onto
19
+
20
+ tokenizer_v3 = MistralTokenizer.v3()
21
+
22
+ mistral_query = ChatCompletionRequest(
23
+ tools=[
24
+ Tool(
25
+ function=Function(
26
+ name="get_current_weather",
27
+ description="Get the current weather",
28
+ parameters={
29
+ "type": "object",
30
+ "properties": {
31
+ "location": {
32
+ "type": "string",
33
+ "description": "The city and state, e.g. San Francisco, CA",
34
+ },
35
+ "format": {
36
+ "type": "string",
37
+ "enum": ["celsius", "fahrenheit"],
38
+ "description": "The temperature unit to use. Infer this from the users location.",
39
+ },
40
+ },
41
+ "required": ["location", "format"],
42
+ },
43
+ )
44
+ )
45
+ ],
46
+ messages=[
47
+ UserMessage(content="What's the weather like today in Paris"),
48
+ ],
49
+ model="test",
50
+ )
51
+
52
+ encodeds = tokenizer_v3.encode_chat_completion(mistral_query).tokens
53
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
54
+ model_inputs = encodeds.to(device)
55
+ model.to(device)
56
+
57
+ generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
58
+ sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
59
+ decoded = sp_tokenizer.decode(generated_ids[0])
60
+ print(decoded)
61
+ ```
62
+
63
+ # Instruct tokenizer
64
+ The HuggingFace tokenizer included in this release should match our own. To compare:
65
+ `pip install mistral-common`
66
+
67
+ ```py
68
+ from mistral_common.protocol.instruct.messages import (
69
+ AssistantMessage,
70
+ UserMessage,
71
+ )
72
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
73
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
74
+
75
+ from transformers import AutoTokenizer
76
+
77
+ tokenizer_v3 = MistralTokenizer.v3()
78
+
79
+ mistral_query = ChatCompletionRequest(
80
+ messages=[
81
+ UserMessage(content="How many experts ?"),
82
+ AssistantMessage(content="8"),
83
+ UserMessage(content="How big ?"),
84
+ AssistantMessage(content="22B"),
85
+ UserMessage(content="Noice 🎉 !"),
86
+ ],
87
+ model="test",
88
+ )
89
+ hf_messages = mistral_query.model_dump()['messages']
90
+
91
+ tokenized_mistral = tokenizer_v3.encode_chat_completion(mistral_query).tokens
92
+
93
+ tokenizer_hf = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
94
+ tokenized_hf = tokenizer_hf.apply_chat_template(hf_messages, tokenize=True)
95
+
96
+ assert tokenized_hf == tokenized_mistral
97
+ ```
98
+
99
+ # Function calling and special tokens
100
+ This tokenizer includes more special tokens, related to function calling :
101
+ - [TOOL_CALLS]
102
+ - [AVAILABLE_TOOLS]
103
+ - [/AVAILABLE_TOOLS]
104
+ - [TOOL_RESULT]
105
+ - [/TOOL_RESULTS]
106
+
107
+ If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](github.com/mistralai/mistral-common/...).
108
+
109
+ # The Mistral AI Team
110
+ Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
111
+ Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
112
+ Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
113
+ Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
114
+ Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
115
+ Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
116
+ Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
117
+ Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
118
+ Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
119
+ Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
120
+ Valera Nemychnikova, William El Sayed, William Marshall
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MixtralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 6144,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 16384,
12
+ "max_position_embeddings": 65536,
13
+ "model_type": "mixtral",
14
+ "num_attention_heads": 48,
15
+ "num_experts_per_tok": 2,
16
+ "num_hidden_layers": 56,
17
+ "num_key_value_heads": 8,
18
+ "num_local_experts": 8,
19
+ "output_router_logits": false,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_theta": 1000000.0,
22
+ "router_aux_loss_coef": 0.001,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.38.0",
27
+ "use_cache": true,
28
+ "vocab_size": 32768,
29
+ "quantization_config": {
30
+ "quant_method": "exl2",
31
+ "version": "0.0.18",
32
+ "bits": 3.75,
33
+ "head_bits": 6,
34
+ "calibration": {
35
+ "rows": 100,
36
+ "length": 2048,
37
+ "dataset": "(default)"
38
+ }
39
+ }
40
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.34.0.dev0"
6
+ }
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
output-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c2ff2ba5476b3df89330d08098251db24985d08020a613687664e20d9e792d
3
+ size 8581924752
output-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161ec218c6a4e079570d9d0de990119f47d26860d816881e345185e09dda4365
3
+ size 8573208432
output-00003-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a256f8d323069917b9fbca66fb5ce6c3b414e8f05dc3ed4c45286e1af524c50
3
+ size 8558958648
output-00004-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c539862538fde337435a29f2954043b4bfe466b0453443543451f8adfa732fc
3
+ size 8553708680
output-00005-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18aeef37fc63451e856d24eb808801d4371361e3383ca769fa845f63bd93f252
3
+ size 8589855920
output-00006-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd50ac28545134deec452ec84e3234966380f12884c1108de902f1d6c79e1878
3
+ size 8573248536
output-00007-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b999904f011d77e9097437a4e0eec2d2a1569fa8c6c3224dc06d1771f2d52c
3
+ size 8539662824
output-00008-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978d6eb3161eb053efecd9b4eff2dbec811239bca5ebbd81f3f8588d78b3d8a7
3
+ size 6296987240
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>",
5
+ "b_inst": "[INST]",
6
+ "e_inst": "[/INST]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": null,
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }