MaziyarPanahi commited on
Commit
aca5eb3
1 Parent(s): 9a0c65b

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - es
6
+ - it
7
+ - de
8
+ - fr
9
+ ---
10
+
11
+ # Model Card for Mixtral-8x22B-Instruct-v0.1
12
+ The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
13
+
14
+ ## Run the model
15
+ ```python
16
+ from transformers import AutoModelForCausalLM
17
+ from mistral_common.protocol.instruct.messages import (
18
+ AssistantMessage,
19
+ UserMessage,
20
+ )
21
+ from mistral_common.protocol.instruct.tool_calls import (
22
+ Tool,
23
+ Function,
24
+ )
25
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
26
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
27
+
28
+ device = "cuda" # the device to load the model onto
29
+
30
+ tokenizer_v3 = MistralTokenizer.v3()
31
+
32
+ mistral_query = ChatCompletionRequest(
33
+ tools=[
34
+ Tool(
35
+ function=Function(
36
+ name="get_current_weather",
37
+ description="Get the current weather",
38
+ parameters={
39
+ "type": "object",
40
+ "properties": {
41
+ "location": {
42
+ "type": "string",
43
+ "description": "The city and state, e.g. San Francisco, CA",
44
+ },
45
+ "format": {
46
+ "type": "string",
47
+ "enum": ["celsius", "fahrenheit"],
48
+ "description": "The temperature unit to use. Infer this from the users location.",
49
+ },
50
+ },
51
+ "required": ["location", "format"],
52
+ },
53
+ )
54
+ )
55
+ ],
56
+ messages=[
57
+ UserMessage(content="What's the weather like today in Paris"),
58
+ ],
59
+ model="test",
60
+ )
61
+
62
+ encodeds = tokenizer_v3.encode_chat_completion(mistral_query).tokens
63
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
64
+ model_inputs = encodeds.to(device)
65
+ model.to(device)
66
+
67
+ generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
68
+ sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
69
+ decoded = sp_tokenizer.decode(generated_ids[0])
70
+ print(decoded)
71
+ ```
72
+
73
+ # Instruct tokenizer
74
+ The HuggingFace tokenizer included in this release should match our own. To compare:
75
+ `pip install mistral-common`
76
+
77
+ ```py
78
+ from mistral_common.protocol.instruct.messages import (
79
+ AssistantMessage,
80
+ UserMessage,
81
+ )
82
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
83
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
84
+
85
+ from transformers import AutoTokenizer
86
+
87
+ tokenizer_v3 = MistralTokenizer.v3()
88
+
89
+ mistral_query = ChatCompletionRequest(
90
+ messages=[
91
+ UserMessage(content="How many experts ?"),
92
+ AssistantMessage(content="8"),
93
+ UserMessage(content="How big ?"),
94
+ AssistantMessage(content="22B"),
95
+ UserMessage(content="Noice 🎉 !"),
96
+ ],
97
+ model="test",
98
+ )
99
+ hf_messages = mistral_query.model_dump()['messages']
100
+
101
+ tokenized_mistral = tokenizer_v3.encode_chat_completion(mistral_query).tokens
102
+
103
+ tokenizer_hf = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
104
+ tokenized_hf = tokenizer_hf.apply_chat_template(hf_messages, tokenize=True)
105
+
106
+ assert tokenized_hf == tokenized_mistral
107
+ ```
108
+
109
+ # Function calling and special tokens
110
+ This tokenizer includes more special tokens, related to function calling :
111
+ - [TOOL_CALLS]
112
+ - [AVAILABLE_TOOLS]
113
+ - [/AVAILABLE_TOOLS]
114
+ - [TOOL_RESULTS]
115
+ - [/TOOL_RESULTS]
116
+
117
+ If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](https://github.com/mistralai/mistral-common/blob/main/src/mistral_common/tokens/tokenizers/sentencepiece.py#L299).
118
+
119
+ # The Mistral AI Team
120
+ Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
121
+ Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
122
+ Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
123
+ Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
124
+ Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
125
+ Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
126
+ Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
127
+ Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
128
+ Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
129
+ Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
130
+ Valera Nemychnikova, William El Sayed, William Marshall
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MixtralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 6144,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 16384,
12
+ "max_position_embeddings": 65536,
13
+ "model_type": "mixtral",
14
+ "num_attention_heads": 48,
15
+ "num_experts_per_tok": 2,
16
+ "num_hidden_layers": 56,
17
+ "num_key_value_heads": 8,
18
+ "num_local_experts": 8,
19
+ "output_router_logits": false,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_theta": 1000000.0,
22
+ "router_aux_loss_coef": 0.001,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.38.0",
27
+ "use_cache": true,
28
+ "vocab_size": 32768
29
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.34.0.dev0"
6
+ }
model-00058-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2de705aefc7b98a4394b9b691fd733d19633370ec8c3ded13f89fe73e11b5b
3
+ size 4806799144
model-00059-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:700482c2a697bd67ae38b25ddbd81babb83c77ebce91b5f61761409eb55e4ae0
3
+ size 1207997392
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>",
5
+ "b_inst": "[INST]",
6
+ "e_inst": "[/INST]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": [
33
+ {
34
+ "name": "default",
35
+ "template": "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
36
+ },
37
+ {
38
+ "name": "tool_use",
39
+ "template": "{{bos_token}}{% set user_messages = messages | selectattr('role', 'equalto', 'user') | list %}{% for message in messages %}{% if message['role'] == 'user' %}{% if message == user_messages[-1] %}{{ '[AVAILABLE_TOOLS]'}}{% for tool in tools %}{{ tool }}{% endfor %}{{ '[/AVAILABLE_TOOLS]'}}{{ '[INST]' + message['content'] + '[/INST]' }}{% else %}{{ '[INST]' + message['content'] + '[/INST]' }}{% endif %}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% elif message['role'] == 'tool_results' %}{{'[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]'}}{% elif message['role'] == 'tool_calls' %}{{'[TOOL_CALLS]' + message['content']|string + eos_token}}{% endif %}{% endfor %}"
40
+ }
41
+ ],
42
+ "clean_up_tokenization_spaces": false,
43
+ "eos_token": "</s>",
44
+ "legacy": true,
45
+ "model_max_length": 1000000000000000019884624838656,
46
+ "pad_token": null,
47
+ "sp_model_kwargs": {},
48
+ "spaces_between_special_tokens": false,
49
+ "tokenizer_class": "LlamaTokenizer",
50
+ "unk_token": "<unk>",
51
+ "use_default_system_prompt": false
52
+ }