mmnga commited on
Commit
9c662bb
1 Parent(s): 273e426

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -24
README.md CHANGED
@@ -13,7 +13,8 @@ This model is an experimental model created by merging [mistralai/Mixtral-8x7B-I
13
 
14
  # How we merged experts
15
  We simply take the average of every two experts.weight.
16
- The same goes for gate.weight.
 
17
 
18
  # How To Convert
19
  use colab cpu-high-memory.
@@ -34,26 +35,11 @@ model_name_or_path = "mmnga/Mixtral-Fusion-4x7B-Instruct-v0.1"
34
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
35
  model = MixtralForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True)
36
 
37
- # set num_experts_per_tok 1 or 2 ?
38
- model.config.num_experts_per_tok = 2
39
-
40
- # message
41
- messages = [
42
- {"role": "user", "content": "Tell me what's for dinner tonight."},
43
- ]
44
-
45
- with torch.no_grad():
46
- token_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")
47
- output_ids = model.generate(
48
- token_ids.to(model.device),
49
- temperature=0.5,
50
- do_sample=True,
51
- top_p=0.95,
52
- top_k=40,
53
- max_new_tokens=128,
54
- repetition_penalty=1.5
55
- )
56
- output = tokenizer.decode(output_ids[0][token_ids.size(1) :])
57
- print(output)
58
-
59
- ~~~
 
13
 
14
  # How we merged experts
15
  We simply take the average of every two experts.weight.
16
+ The same goes for gate.weight.
17
+ **Unfortunately, this model has a large hallucination. Look extraction version. -> [mmnga/Mixtral-Extraction-4x7B-Instruct-v0.1](https://huggingface.co/mmnga/Mixtral-Extraction-4x7B-Instruct-v0.1)**
18
 
19
  # How To Convert
20
  use colab cpu-high-memory.
 
35
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
36
  model = MixtralForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True)
37
 
38
+ text = "Tell me what's for dinner tonight. "
39
+ inputs = tokenizer(text, return_tensors="pt")
40
+
41
+ outputs = model.generate(**inputs, max_new_tokens=128)
42
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
43
+
44
+ ~~~
45
+