DewEfresh commited on
Commit
a07e925
1 Parent(s): 4d20b61

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - m-a-p/neo_7b
4
+ - DewEfresh/neo_7b
5
+ tags:
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - m-a-p/neo_7b
10
+ - DewEfresh/neo_7b
11
+ ---
12
+
13
+ # Neo_7b-merge8
14
+
15
+ Neo_7b-merge8 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
16
+ * [m-a-p/neo_7b](https://huggingface.co/m-a-p/neo_7b)
17
+ * [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
18
+
19
+ ## 🧩 Configuration
20
+
21
+ ```yaml
22
+ slices:
23
+ # Group 1 (layers 0-3 to 0-2)
24
+ - sources:
25
+ - model: m-a-p/neo_7b
26
+ layer_range: [0, 0]
27
+ - model: DewEfresh/neo_7b
28
+ layer_range: [3, 3]
29
+ - sources:
30
+ - model: m-a-p/neo_7b
31
+ layer_range: [1, 1]
32
+ - model: DewEfresh/neo_7b
33
+ layer_range: [3, 3]
34
+ - sources:
35
+ - model: m-a-p/neo_7b
36
+ layer_range: [2, 2]
37
+ - model: DewEfresh/neo_7b
38
+ layer_range: [3, 3]
39
+
40
+ # Group 2 (layers 4-7 to 3-5)
41
+ - sources:
42
+ - model: m-a-p/neo_7b
43
+ layer_range: [3, 3]
44
+ - model: DewEfresh/neo_7b
45
+ layer_range: [7, 7]
46
+ - sources:
47
+ - model: m-a-p/neo_7b
48
+ layer_range: [4, 4]
49
+ - model: DewEfresh/neo_7b
50
+ layer_range: [7, 7]
51
+ - sources:
52
+ - model: m-a-p/neo_7b
53
+ layer_range: [5, 5]
54
+ - model: DewEfresh/neo_7b
55
+ layer_range: [7, 7]
56
+
57
+ # Group 3 (layers 8-11 to 6-8)
58
+ - sources:
59
+ - model: m-a-p/neo_7b
60
+ layer_range: [6, 6]
61
+ - model: DewEfresh/neo_7b
62
+ layer_range: [11, 11]
63
+ - sources:
64
+ - model: m-a-p/neo_7b
65
+ layer_range: [7, 7]
66
+ - model: DewEfresh/neo_7b
67
+ layer_range: [11, 11]
68
+ - sources:
69
+ - model: m-a-p/neo_7b
70
+ layer_range: [8, 8]
71
+ - model: DewEfresh/neo_7b
72
+ layer_range: [11, 11]
73
+
74
+ # Group 4 (layers 12-15 to 9-11)
75
+ - sources:
76
+ - model: m-a-p/neo_7b
77
+ layer_range: [9, 9]
78
+ - model: DewEfresh/neo_7b
79
+ layer_range: [15, 15]
80
+ - sources:
81
+ - model: m-a-p/neo_7b
82
+ layer_range: [10, 10]
83
+ - model: DewEfresh/neo_7b
84
+ layer_range: [15, 15]
85
+ - sources:
86
+ - model: m-a-p/neo_7b
87
+ layer_range: [11, 11]
88
+ - model: DewEfresh/neo_7b
89
+ layer_range: [15, 15]
90
+
91
+ # Group 5 (layers 16-19 to 12-14)
92
+ - sources:
93
+ - model: m-a-p/neo_7b
94
+ layer_range: [12, 12]
95
+ - model: DewEfresh/neo_7b
96
+ layer_range: [19, 19]
97
+ - sources:
98
+ - model: m-a-p/neo_7b
99
+ layer_range: [13, 13]
100
+ - model: DewEfresh/neo_7b
101
+ layer_range: [19, 19]
102
+ - sources:
103
+ - model: m-a-p/neo_7b
104
+ layer_range: [14, 14]
105
+ - model: DewEfresh/neo_7b
106
+ layer_range: [19, 19]
107
+
108
+ # Group 6 (layers 20-23 to 15-17)
109
+ - sources:
110
+ - model: m-a-p/neo_7b
111
+ layer_range: [15, 15]
112
+ - model: DewEfresh/neo_7b
113
+ layer_range: [23, 23]
114
+ - sources:
115
+ - model: m-a-p/neo_7b
116
+ layer_range: [16, 16]
117
+ - model: DewEfresh/neo_7b
118
+ layer_range: [23, 23]
119
+ - sources:
120
+ - model: m-a-p/neo_7b
121
+ layer_range: [17, 17]
122
+ - model: DewEfresh/neo_7b
123
+ layer_range: [23, 23]
124
+
125
+ # Group 7 (layers 24-27 to 18-20)
126
+ - sources:
127
+ - model: m-a-p/neo_7b
128
+ layer_range: [18, 18]
129
+ - model: DewEfresh/neo_7b
130
+ layer_range: [27, 27]
131
+ - sources:
132
+ - model: m-a-p/neo_7b
133
+ layer_range: [19, 19]
134
+ - model: DewEfresh/neo_7b
135
+ layer_range: [27, 27]
136
+ - sources:
137
+ - model: m-a-p/neo_7b
138
+ layer_range: [20, 20]
139
+ - model: DewEfresh/neo_7b
140
+ layer_range: [27, 27]
141
+
142
+ merge_method: slerp
143
+ base_model: m-a-p/neo_7b
144
+ parameters:
145
+ t:
146
+ - 0.75 # Weight for m-a-p/neo_7b layer
147
+ - 0.25 # Weight for the 4th DewEfresh/neo_7b layer being merged
148
+ dtype: bfloat16
149
+ output_path: ./merged_reduced_map_dewefresh_neo_7b
150
+ model_config:
151
+ architectures: ["LlamaForCausalLM"]
152
+ attention_bias: false
153
+ attention_dropout: 0.0
154
+ hidden_act: "silu"
155
+ hidden_size: 3072
156
+ intermediate_size: 24576
157
+ max_position_embeddings: 8192
158
+ model_type: "llama"
159
+ num_attention_heads: 16
160
+ num_hidden_layers: 21 # Reduced from 28 to 21
161
+ num_key_value_heads: 16
162
+ rms_norm_eps: 1e-05
163
+ rope_theta: 10000.0
164
+ use_cache: true
165
+ vocab_size: 64256
166
+ ```
167
+
168
+ ## 💻 Usage
169
+
170
+ ```python
171
+ !pip install -qU transformers accelerate
172
+
173
+ from transformers import AutoTokenizer
174
+ import transformers
175
+ import torch
176
+
177
+ model = "DewEfresh/Neo_7b-merge8"
178
+ messages = [{"role": "user", "content": "What is a large language model?"}]
179
+
180
+ tokenizer = AutoTokenizer.from_pretrained(model)
181
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
182
+ pipeline = transformers.pipeline(
183
+ "text-generation",
184
+ model=model,
185
+ torch_dtype=torch.float16,
186
+ device_map="auto",
187
+ )
188
+
189
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
190
+ print(outputs[0]["generated_text"])
191
+ ```
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|CLS|>": 64000,
3
+ "<|EOD|>": 64002,
4
+ "<|MASK|>": 64003,
5
+ "<|PAD|>": 64004,
6
+ "<|SEP|>": 64001
7
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "m-a-p/neo_7b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 24576,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 0,
19
+ "num_key_value_heads": 16,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.3",
27
+ "use_cache": true,
28
+ "vocab_size": 64256
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ slices:
3
+ # Group 1 (layers 0-3 to 0-2)
4
+ - sources:
5
+ - model: m-a-p/neo_7b
6
+ layer_range: [0, 0]
7
+ - model: DewEfresh/neo_7b
8
+ layer_range: [3, 3]
9
+ - sources:
10
+ - model: m-a-p/neo_7b
11
+ layer_range: [1, 1]
12
+ - model: DewEfresh/neo_7b
13
+ layer_range: [3, 3]
14
+ - sources:
15
+ - model: m-a-p/neo_7b
16
+ layer_range: [2, 2]
17
+ - model: DewEfresh/neo_7b
18
+ layer_range: [3, 3]
19
+
20
+ # Group 2 (layers 4-7 to 3-5)
21
+ - sources:
22
+ - model: m-a-p/neo_7b
23
+ layer_range: [3, 3]
24
+ - model: DewEfresh/neo_7b
25
+ layer_range: [7, 7]
26
+ - sources:
27
+ - model: m-a-p/neo_7b
28
+ layer_range: [4, 4]
29
+ - model: DewEfresh/neo_7b
30
+ layer_range: [7, 7]
31
+ - sources:
32
+ - model: m-a-p/neo_7b
33
+ layer_range: [5, 5]
34
+ - model: DewEfresh/neo_7b
35
+ layer_range: [7, 7]
36
+
37
+ # Group 3 (layers 8-11 to 6-8)
38
+ - sources:
39
+ - model: m-a-p/neo_7b
40
+ layer_range: [6, 6]
41
+ - model: DewEfresh/neo_7b
42
+ layer_range: [11, 11]
43
+ - sources:
44
+ - model: m-a-p/neo_7b
45
+ layer_range: [7, 7]
46
+ - model: DewEfresh/neo_7b
47
+ layer_range: [11, 11]
48
+ - sources:
49
+ - model: m-a-p/neo_7b
50
+ layer_range: [8, 8]
51
+ - model: DewEfresh/neo_7b
52
+ layer_range: [11, 11]
53
+
54
+ # Group 4 (layers 12-15 to 9-11)
55
+ - sources:
56
+ - model: m-a-p/neo_7b
57
+ layer_range: [9, 9]
58
+ - model: DewEfresh/neo_7b
59
+ layer_range: [15, 15]
60
+ - sources:
61
+ - model: m-a-p/neo_7b
62
+ layer_range: [10, 10]
63
+ - model: DewEfresh/neo_7b
64
+ layer_range: [15, 15]
65
+ - sources:
66
+ - model: m-a-p/neo_7b
67
+ layer_range: [11, 11]
68
+ - model: DewEfresh/neo_7b
69
+ layer_range: [15, 15]
70
+
71
+ # Group 5 (layers 16-19 to 12-14)
72
+ - sources:
73
+ - model: m-a-p/neo_7b
74
+ layer_range: [12, 12]
75
+ - model: DewEfresh/neo_7b
76
+ layer_range: [19, 19]
77
+ - sources:
78
+ - model: m-a-p/neo_7b
79
+ layer_range: [13, 13]
80
+ - model: DewEfresh/neo_7b
81
+ layer_range: [19, 19]
82
+ - sources:
83
+ - model: m-a-p/neo_7b
84
+ layer_range: [14, 14]
85
+ - model: DewEfresh/neo_7b
86
+ layer_range: [19, 19]
87
+
88
+ # Group 6 (layers 20-23 to 15-17)
89
+ - sources:
90
+ - model: m-a-p/neo_7b
91
+ layer_range: [15, 15]
92
+ - model: DewEfresh/neo_7b
93
+ layer_range: [23, 23]
94
+ - sources:
95
+ - model: m-a-p/neo_7b
96
+ layer_range: [16, 16]
97
+ - model: DewEfresh/neo_7b
98
+ layer_range: [23, 23]
99
+ - sources:
100
+ - model: m-a-p/neo_7b
101
+ layer_range: [17, 17]
102
+ - model: DewEfresh/neo_7b
103
+ layer_range: [23, 23]
104
+
105
+ # Group 7 (layers 24-27 to 18-20)
106
+ - sources:
107
+ - model: m-a-p/neo_7b
108
+ layer_range: [18, 18]
109
+ - model: DewEfresh/neo_7b
110
+ layer_range: [27, 27]
111
+ - sources:
112
+ - model: m-a-p/neo_7b
113
+ layer_range: [19, 19]
114
+ - model: DewEfresh/neo_7b
115
+ layer_range: [27, 27]
116
+ - sources:
117
+ - model: m-a-p/neo_7b
118
+ layer_range: [20, 20]
119
+ - model: DewEfresh/neo_7b
120
+ layer_range: [27, 27]
121
+
122
+ merge_method: slerp
123
+ base_model: m-a-p/neo_7b
124
+ parameters:
125
+ t:
126
+ - 0.75 # Weight for m-a-p/neo_7b layer
127
+ - 0.25 # Weight for the 4th DewEfresh/neo_7b layer being merged
128
+ dtype: bfloat16
129
+ output_path: ./merged_reduced_map_dewefresh_neo_7b
130
+ model_config:
131
+ architectures: ["LlamaForCausalLM"]
132
+ attention_bias: false
133
+ attention_dropout: 0.0
134
+ hidden_act: "silu"
135
+ hidden_size: 3072
136
+ intermediate_size: 24576
137
+ max_position_embeddings: 8192
138
+ model_type: "llama"
139
+ num_attention_heads: 16
140
+ num_hidden_layers: 21 # Reduced from 28 to 21
141
+ num_key_value_heads: 16
142
+ rms_norm_eps: 1e-05
143
+ rope_theta: 10000.0
144
+ use_cache: true
145
+ vocab_size: 64256
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41209159383a6438fa1e73146df470aae582c40bf510ec2495a3f8780477ce87
3
+ size 789584192
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f2a7f3db85521671732789bb8fd1bb3ae3a7e0d33170ea530f53a25346fcdd
3
+ size 4998668592
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1227a57ac0fc74f5a2c223e5e41dd53ba09f50acbf6d63e5f166a8e1fc740c5e
3
+ size 4926336584
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842f7380860bf7fcbc76d2380d90cadd65d691872aea7b707c770947a2456da6
3
+ size 4907455800
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c646eb68fbc0f40d473ef6eb80dd23fcb436002f2f32848ec9e4c3e46ad8e216
3
+ size 755001136
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 789583872}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|CLS|>",
4
+ "<|SEP|>",
5
+ "<|EOD|>",
6
+ "<|MASK|>",
7
+ "<|PAD|>"
8
+ ],
9
+ "bos_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "pad_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": true
36
+ }
37
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6a2447b0e5664cabb2481587597102d82f42f0ccb7ef22e1c2d95494a8b03c5
3
+ size 1002561
tokenizer_config.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ },
29
+ "64000": {
30
+ "content": "<|CLS|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "64001": {
38
+ "content": "<|SEP|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "64002": {
46
+ "content": "<|EOD|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "64003": {
54
+ "content": "<|MASK|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "64004": {
62
+ "content": "<|PAD|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "additional_special_tokens": [
71
+ "<|CLS|>",
72
+ "<|SEP|>",
73
+ "<|EOD|>",
74
+ "<|MASK|>",
75
+ "<|PAD|>"
76
+ ],
77
+ "auto_map": {
78
+ "AutoTokenizer": [
79
+ "m-a-p/neo_7b--tokenization_neo.NEOTokenizer",
80
+ null
81
+ ]
82
+ },
83
+ "bos_token": "<s>",
84
+ "chat_template": "{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
85
+ "clean_up_tokenization_spaces": false,
86
+ "eos_token": "</s>",
87
+ "model_max_length": 4096,
88
+ "pad_token": "<unk>",
89
+ "padding_side": "right",
90
+ "sp_model_kwargs": {},
91
+ "split_special_tokens": false,
92
+ "tokenizer_class": "NEOTokenizer",
93
+ "unk_token": "<unk>",
94
+ "use_fast": false
95
+ }