Update README.md

#1
by reach-vb HF staff - opened
Files changed (1) hide show
  1. README.md +7 -126
README.md CHANGED
@@ -1,121 +1,15 @@
1
  ---
 
2
  language:
3
  - fr
4
  - it
5
  - de
6
  - es
7
  - en
8
- license: apache-2.0
9
  tags:
10
  - moe
11
- model-index:
12
- - name: Mixtral-8x22B-v0.1
13
- results:
14
- - task:
15
- type: text-generation
16
- name: Text Generation
17
- dataset:
18
- name: AI2 Reasoning Challenge (25-Shot)
19
- type: ai2_arc
20
- config: ARC-Challenge
21
- split: test
22
- args:
23
- num_few_shot: 25
24
- metrics:
25
- - type: acc_norm
26
- value: 70.48
27
- name: normalized accuracy
28
- source:
29
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
30
- name: Open LLM Leaderboard
31
- - task:
32
- type: text-generation
33
- name: Text Generation
34
- dataset:
35
- name: HellaSwag (10-Shot)
36
- type: hellaswag
37
- split: validation
38
- args:
39
- num_few_shot: 10
40
- metrics:
41
- - type: acc_norm
42
- value: 88.73
43
- name: normalized accuracy
44
- source:
45
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
46
- name: Open LLM Leaderboard
47
- - task:
48
- type: text-generation
49
- name: Text Generation
50
- dataset:
51
- name: MMLU (5-Shot)
52
- type: cais/mmlu
53
- config: all
54
- split: test
55
- args:
56
- num_few_shot: 5
57
- metrics:
58
- - type: acc
59
- value: 77.81
60
- name: accuracy
61
- source:
62
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
63
- name: Open LLM Leaderboard
64
- - task:
65
- type: text-generation
66
- name: Text Generation
67
- dataset:
68
- name: TruthfulQA (0-shot)
69
- type: truthful_qa
70
- config: multiple_choice
71
- split: validation
72
- args:
73
- num_few_shot: 0
74
- metrics:
75
- - type: mc2
76
- value: 51.08
77
- source:
78
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
79
- name: Open LLM Leaderboard
80
- - task:
81
- type: text-generation
82
- name: Text Generation
83
- dataset:
84
- name: Winogrande (5-shot)
85
- type: winogrande
86
- config: winogrande_xl
87
- split: validation
88
- args:
89
- num_few_shot: 5
90
- metrics:
91
- - type: acc
92
- value: 84.53
93
- name: accuracy
94
- source:
95
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
96
- name: Open LLM Leaderboard
97
- - task:
98
- type: text-generation
99
- name: Text Generation
100
- dataset:
101
- name: GSM8k (5-shot)
102
- type: gsm8k
103
- config: main
104
- split: test
105
- args:
106
- num_few_shot: 5
107
- metrics:
108
- - type: acc
109
- value: 74.15
110
- name: accuracy
111
- source:
112
- url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=mistral-community/Mixtral-8x22B-v0.1
113
- name: Open LLM Leaderboard
114
  ---
115
- # Mixtral-8x22B
116
-
117
- > [!TIP]
118
- > MistralAI has uploaded weights to their organization at [mistralai/Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1) and [mistralai/Mixtral-8x22B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1) too.
119
 
120
  > [!TIP]
121
  > Kudos to [@v2ray](https://huggingface.co/v2ray) for converting the checkpoints and uploading them in `transformers` compatible format. Go give them a follow!
@@ -127,7 +21,7 @@ The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse M
127
  ```python
128
  from transformers import AutoModelForCausalLM, AutoTokenizer
129
 
130
- model_id = "mistral-community/Mixtral-8x22B-v0.1"
131
  tokenizer = AutoTokenizer.from_pretrained(model_id)
132
 
133
  model = AutoModelForCausalLM.from_pretrained(model_id)
@@ -148,7 +42,7 @@ Note `float16` precision only works on GPU devices
148
  + import torch
149
  from transformers import AutoModelForCausalLM, AutoTokenizer
150
 
151
- model_id = "mistral-community/Mixtral-8x22B-v0.1"
152
  tokenizer = AutoTokenizer.from_pretrained(model_id)
153
 
154
  + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(0)
@@ -169,7 +63,7 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
169
  + import torch
170
  from transformers import AutoModelForCausalLM, AutoTokenizer
171
 
172
- model_id = "mistral-community/Mixtral-8x22B-v0.1"
173
  tokenizer = AutoTokenizer.from_pretrained(model_id)
174
 
175
  + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
@@ -190,7 +84,7 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
190
  + import torch
191
  from transformers import AutoModelForCausalLM, AutoTokenizer
192
 
193
- model_id = "mistral-community/Mixtral-8x22B-v0.1"
194
  tokenizer = AutoTokenizer.from_pretrained(model_id)
195
 
196
  + model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True)
@@ -206,17 +100,4 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
206
  ## Notice
207
  Mixtral-8x22B-v0.1 is a pretrained base model and therefore does not have any moderation mechanisms.
208
  # The Mistral AI Team
209
- Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux, Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot, Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona, Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon, Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat, Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen, Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao, Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang, Valera Nemychnikova, William El Sayed, William Marshall.
210
- # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
211
- Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_mistral-community__Mixtral-8x22B-v0.1)
212
-
213
- | Metric |Value|
214
- |---------------------------------|----:|
215
- |Avg. |74.46|
216
- |AI2 Reasoning Challenge (25-Shot)|70.48|
217
- |HellaSwag (10-Shot) |88.73|
218
- |MMLU (5-Shot) |77.81|
219
- |TruthfulQA (0-shot) |51.08|
220
- |Winogrande (5-shot) |84.53|
221
- |GSM8k (5-shot) |74.15|
222
-
 
1
  ---
2
+ license: apache-2.0
3
  language:
4
  - fr
5
  - it
6
  - de
7
  - es
8
  - en
 
9
  tags:
10
  - moe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
+ # Model Card for Mixtral-8x22B
 
 
 
13
 
14
  > [!TIP]
15
  > Kudos to [@v2ray](https://huggingface.co/v2ray) for converting the checkpoints and uploading them in `transformers` compatible format. Go give them a follow!
 
21
  ```python
22
  from transformers import AutoModelForCausalLM, AutoTokenizer
23
 
24
+ model_id = "v2ray/Mixtral-8x22B-v0.1"
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
 
27
  model = AutoModelForCausalLM.from_pretrained(model_id)
 
42
  + import torch
43
  from transformers import AutoModelForCausalLM, AutoTokenizer
44
 
45
+ model_id = "v2ray/Mixtral-8x22B-v0.1"
46
  tokenizer = AutoTokenizer.from_pretrained(model_id)
47
 
48
  + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(0)
 
63
  + import torch
64
  from transformers import AutoModelForCausalLM, AutoTokenizer
65
 
66
+ model_id = "v2ray/Mixtral-8x22B-v0.1"
67
  tokenizer = AutoTokenizer.from_pretrained(model_id)
68
 
69
  + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
 
84
  + import torch
85
  from transformers import AutoModelForCausalLM, AutoTokenizer
86
 
87
+ model_id = "v2ray/Mixtral-8x22B-v0.1"
88
  tokenizer = AutoTokenizer.from_pretrained(model_id)
89
 
90
  + model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True)
 
100
  ## Notice
101
  Mixtral-8x22B-v0.1 is a pretrained base model and therefore does not have any moderation mechanisms.
102
  # The Mistral AI Team
103
+ Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux, Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot, Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona, Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon, Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat, Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen, Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao, Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang, Valera Nemychnikova, William El Sayed, William Marshall.