mmnga commited on
Commit
ea56a7c
1 Parent(s): d1caec3

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +58 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - fr
5
+ - it
6
+ - de
7
+ - es
8
+ - en
9
+ inference: false
10
+ ---
11
+ # Model Card for Mixtral-Fusion-4x7B-Instruct-v0.1
12
+ This model is an experimental model created by merging mixtral 8x7b experts.
13
+
14
+ # How we merged experts
15
+ We simply take the average of every two experts.weight.
16
+ The same goes for gate.weight.
17
+
18
+ # How To Convert
19
+ notebook
20
+
21
+ # Usage
22
+ ~~~python
23
+ pip install git+https://github.com/huggingface/transformers --upgrade
24
+ pip install torch accelerate bitsandbytes flash_attn
25
+ ~~~
26
+
27
+ ~~~python
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM, MixtralForCausalLM
29
+ import torch
30
+
31
+ model_name_or_path = "mmnga/Mixtral-Fusion-4x7B-Instruct-v0.1"
32
+
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
34
+ model = MixtralForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True)
35
+
36
+ # set num_experts_per_tok 1 or 2 ?
37
+ model.config.num_experts_per_tok = 1
38
+
39
+ # message
40
+ messages = [
41
+ {"role": "user", "content": "Tell me what's for dinner tonight."},
42
+ ]
43
+
44
+ with torch.no_grad():
45
+ token_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")
46
+ output_ids = model.generate(
47
+ token_ids.to(model.device),
48
+ temperature=0.5,
49
+ do_sample=True,
50
+ top_p=0.95,
51
+ top_k=40,
52
+ max_new_tokens=128,
53
+ repetition_penalty=1.5
54
+ )
55
+ output = tokenizer.decode(output_ids[0][token_ids.size(1) :])
56
+ print(output)
57
+
58
+ ~~~