RDson commited on
Commit
db12daf
1 Parent(s): 26cd411

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +91 -0
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - moe
4
+ - llama
5
+ - '3'
6
+ - llama 3
7
+ - 2x8b
8
+ ---
9
+ <img src="https://i.imgur.com/c1Mv8cy.png" width="640"/>
10
+
11
+ # Llama-3-Teal-Instruct-2x8B-MoE
12
+ This is a experimental MoE created from [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [nvidia/Llama3-ChatQA-1.5-8B](https://huggingface.co/nvidia/Llama3-ChatQA-1.5-8B), [Salesforce/SFR-Iterative-DPO-LLaMA-3-8B-R](https://huggingface.co/Salesforce/SFR-Iterative-DPO-LLaMA-3-8B-R) and [Muhammad2003/Llama3-8B-OpenHermes-DPO](https://huggingface.co/Muhammad2003/Llama3-8B-OpenHermes-DPO) using Mergekit.
13
+
14
+ Mergekit yaml file:
15
+ ```
16
+ base_model: Meta-Llama-3-8B-Instruct
17
+ experts:
18
+ - source_model: Meta-Llama-3-8B-Instruct
19
+ positive_prompts:
20
+ - "explain"
21
+ - "chat"
22
+ - "assistant"
23
+ - "think"
24
+ - "roleplay"
25
+ - "versatile"
26
+ - "helpful"
27
+ - "factual"
28
+ - "integrated"
29
+ - "adaptive"
30
+ - "comprehensive"
31
+ - "balanced"
32
+ negative_prompts:
33
+ - "specialized"
34
+ - "narrow"
35
+ - "focused"
36
+ - "limited"
37
+ - "specific"
38
+ - source_model: ChatQA-1.5-8B
39
+ positive_prompts:
40
+ - "python"
41
+ - "math"
42
+ - "solve"
43
+ - "code"
44
+ - "programming"
45
+ negative_prompts:
46
+ - "sorry"
47
+ - "cannot"
48
+ - "factual"
49
+ - "concise"
50
+ - "straightforward"
51
+ - "objective"
52
+ - "dry"
53
+ - source_model: SFR-Iterative-DPO-LLaMA-3-8B-R
54
+ positive_prompts:
55
+ - "chat"
56
+ - "assistant"
57
+ - "AI"
58
+ - "instructive"
59
+ - "clear"
60
+ - "directive"
61
+ - "helpful"
62
+ - "informative"
63
+ - source_model: Llama3-8B-OpenHermes-DPO
64
+ positive_prompts:
65
+ - "analytical"
66
+ - "accurate"
67
+ - "logical"
68
+ - "knowledgeable"
69
+ - "precise"
70
+ - "calculate"
71
+ - "compute"
72
+ - "solve"
73
+ - "work"
74
+ - "python"
75
+ - "code"
76
+ - "javascript"
77
+ - "programming"
78
+ - "algorithm"
79
+ - "tell me"
80
+ - "assistant"
81
+ negative_prompts:
82
+ - "creative"
83
+ - "abstract"
84
+ - "imaginative"
85
+ - "artistic"
86
+ - "emotional"
87
+ - "mistake"
88
+ - "inaccurate"
89
+ gate_mode: hidden
90
+ dtype: float16
91
+ ```