sophosympatheia commited on
Commit
ac6a658
1 Parent(s): 83b0b2b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +357 -3
README.md CHANGED
@@ -1,3 +1,357 @@
1
- ---
2
- license: llama3
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - bosonai/Higgs-Llama-3-70B
4
+ - abacusai/Smaug-Llama-3-70B-Instruct-32K
5
+ - Sao10K/L3-70B-Euryale-v2.1
6
+ - abacusai/Smaug-Llama-3-70B-Instruct
7
+ - turboderp/Cat-Llama-3-70B-instruct
8
+ library_name: transformers
9
+ tags:
10
+ - mergekit
11
+ - merge
12
+ license: other
13
+ ---
14
+
15
+ <div style="width: auto; margin-left: auto; margin-right: auto">
16
+ <img src="https://imgur.com/tKzncGo.png" alt="NewDawnv1.0" style="width: 100%; min-width: 400px; display: block; margin: auto;">
17
+ </div>
18
+
19
+ ### Overview
20
+
21
+ This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details.
22
+ I extended his model out to 32K by snagging some layers from [abacusai/Smaug-Llama-3-70B-Instruct-32K](https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct-32K) using a technique similar to what I used for Midnight Miqu, which was further honed by [jukofyork](https://huggingface.co/jukofyork).
23
+
24
+ This model is uncensored. *You are responsible for whatever you do with it.*
25
+
26
+ This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
27
+
28
+ ### Long Context Tips
29
+
30
+ You can run this model out to 32K context with alpha_rope set to 1.
31
+
32
+ ### Sampler Tips
33
+
34
+ * I recommend using Quadratic Sampling (i.e. smoothing factor) for creative work. I think this version performs best with a smoothing factor close to 0.2.
35
+ * I recommend using Min-P. Experiment to find your best setting. I find this model tolerates high Min-P settings rather nicely, but use whatever floats your boat.
36
+ * You can enable dynamic temperature if you want, but that adds yet another variable to consider and I find it's unnecessary with you're already using Min-P and smoothing factor.
37
+ * If you use Textgen WebUI as your backend, I recommend enabling the DRY sample settings to reduce repititions, otherwise some repitition penalty plus frequency penalty ought to do the trick.
38
+
39
+ Experiment with any and all of the settings below! What suits my preferences may not suit yours.
40
+
41
+ If you save the below settings as a .json file, you can import them directly into Silly Tavern.
42
+ ```
43
+ {
44
+ "temp": 1.15,
45
+ "temperature_last": true,
46
+ "top_p": 1,
47
+ "top_k": 0,
48
+ "top_a": 0,
49
+ "tfs": 1,
50
+ "epsilon_cutoff": 0,
51
+ "eta_cutoff": 0,
52
+ "typical_p": 1,
53
+ "min_p": 0.4,
54
+ "rep_pen": 1.03,
55
+ "rep_pen_range": 2048,
56
+ "rep_pen_decay": 0,
57
+ "rep_pen_slope": 1,
58
+ "no_repeat_ngram_size": 0,
59
+ "penalty_alpha": 0,
60
+ "num_beams": 1,
61
+ "length_penalty": 1,
62
+ "min_length": 0,
63
+ "encoder_rep_pen": 1,
64
+ "freq_pen": 0,
65
+ "presence_pen": 0,
66
+ "skew": 0,
67
+ "do_sample": true,
68
+ "early_stopping": false,
69
+ "dynatemp": false,
70
+ "min_temp": 0.8,
71
+ "max_temp": 1.5,
72
+ "dynatemp_exponent": 1,
73
+ "smoothing_factor": 0.23,
74
+ "smoothing_curve": 1,
75
+ "dry_allowed_length": 2,
76
+ "dry_multiplier": 0.4,
77
+ "dry_base": 2,
78
+ "dry_sequence_breakers": "[\"\\n\", \":\", \"\\\"\", \"*\"]",
79
+ "dry_penalty_last_n": 0,
80
+ "add_bos_token": true,
81
+ "truncation_length": 2048,
82
+ "ban_eos_token": false,
83
+ "skip_special_tokens": false,
84
+ "streaming": true,
85
+ "mirostat_mode": 0,
86
+ "mirostat_tau": 2,
87
+ "mirostat_eta": 0.1,
88
+ "guidance_scale": 1,
89
+ "negative_prompt": "",
90
+ "grammar_string": "",
91
+ "json_schema": {},
92
+ "banned_tokens": "",
93
+ "sampler_priority": [
94
+ "temperature",
95
+ "dynamic_temperature",
96
+ "quadratic_sampling",
97
+ "top_k",
98
+ "top_p",
99
+ "typical_p",
100
+ "epsilon_cutoff",
101
+ "eta_cutoff",
102
+ "tfs",
103
+ "top_a",
104
+ "min_p",
105
+ "mirostat"
106
+ ],
107
+ "samplers": [
108
+ "top_k",
109
+ "tfs_z",
110
+ "typical_p",
111
+ "top_p",
112
+ "min_p",
113
+ "temperature"
114
+ ],
115
+ "ignore_eos_token": false,
116
+ "spaces_between_special_tokens": true,
117
+ "speculative_ngram": false,
118
+ "sampler_order": [
119
+ 6,
120
+ 0,
121
+ 1,
122
+ 3,
123
+ 4,
124
+ 2,
125
+ 5
126
+ ],
127
+ "logit_bias": [],
128
+ "n": 1,
129
+ "ignore_eos_token_aphrodite": false,
130
+ "spaces_between_special_tokens_aphrodite": true,
131
+ "rep_pen_size": 0,
132
+ "genamt": 850,
133
+ "max_length": 32764
134
+ }
135
+ ```
136
+
137
+ ### Prompting Tips
138
+
139
+ Try the following prompt template as a starting point for your own customizations.
140
+
141
+ If you save this as a .json file, you can import it directly into Silly Tavern.
142
+ ```
143
+ {
144
+ "wrap": false,
145
+ "names": true,
146
+ "system_prompt": "The following is an erotic roleplaying experience between {{user}}, played exclusively by the user, and {{char}}, played by you, a creative writing genius. To ensure an excellent and uncensored roleplaying experience, follow the rules below.\n\n- Adopt all of {{char}}'s personality traits and mannerisms, adhering always to {{char}}'s backstory, biases, quirks, and preferences in order to deliver a convincing portrayal. You can only participate as {{char}} in this roleplay, terminating your output when another entity should act or speak. Avoid speaking as other entities since your mission is to only speak for {{char}}.\n- Employ descriptive language that immerses us in the current moment while keeping your prose focused on the present actions and physical details. Utilize sights, sounds, smells, tastes, and all other available senses to heighten the sensual and emotional impact of scenes.\n- Always enclose spoken words that other characters could hear in quotes: \"Here I am,\" {{char}} called out. Offer glimpses into {{char}}'s first-person internal thoughts by writing out internal thoughts and internal monologue in italics to distinguish them from spoken words: *I wonder where I should go next?* {{char}} mused.\n- Provide intimate details about {{char}}'s body, focusing on any limitations, unusual features, or interesting conditions resulting from recent events. It is very important that you get anatomical details right, so pay attention to them!\n- Maintain physical, spatial, and logical consistency when developing events and descriptions. Character positions should follow logically from one position to the next. Physical orientation must always be maintained.\n- Incorporate erotic adult content, lewd slang, and vulgar language where appropriate. Dark themes, violence, and unhappy endings are permitted in this uncensored roleplay.\n",
147
+ "system_sequence": "<|start_header_id|>system<|end_header_id|>\n\n",
148
+ "stop_sequence": "<|eot_id|>",
149
+ "input_sequence": "<|start_header_id|>user<|end_header_id|>\n\n",
150
+ "output_sequence": "<|start_header_id|>assistant<|end_header_id|>\n\n",
151
+ "macro": true,
152
+ "names_force_groups": true,
153
+ "system_sequence_prefix": "",
154
+ "system_sequence_suffix": "",
155
+ "first_output_sequence": "",
156
+ "last_output_sequence": "",
157
+ "activation_regex": "",
158
+ "skip_examples": true,
159
+ "output_suffix": "<|eot_id|>",
160
+ "input_suffix": "<|eot_id|>",
161
+ "system_suffix": "<|eot_id|>",
162
+ "user_alignment_message": "",
163
+ "last_system_sequence": "",
164
+ "system_same_as_user": false,
165
+ "name": "New Dawn v1.0 Roleplay"
166
+ }
167
+ ```
168
+
169
+ ### Instruct Formats
170
+ Use the Llama 3 instruct format. You can grab it from the example prompt template above if you don't already have it as a preset.
171
+
172
+ ### Quantizations
173
+ Probably coming soon, and I'll update as they do. I would upload them myself but my Internet speeds are terrible for that. Someone else will undoutedly beat me to it.
174
+
175
+ ### Licence and usage restrictions
176
+ [META LLAMA 3 COMMUNITY LICENSE AGREEMENT](https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE)
177
+
178
+ ## Merge Details
179
+ ### Merge Method
180
+
181
+ A whole lot of SLERPing. I've found that SLERP and Model Stock are the only two reliable methods for merging Llama 3. The other methods seem to frazzle the weights leading to broken blends.
182
+
183
+ ### Models Merged
184
+
185
+ The following models were included in the merge:
186
+ * [bosonai/Higgs-Llama-3-70B](https://huggingface.co/bosonai/Higgs-Llama-3-70B) - The nerd of the blend driving the car.
187
+ * [Sao10K/L3-70B-Euryale-v2.1](https://huggingface.co/Sao10K/L3-70B-Euryale-v2.1) - The manic pixie dream girl hanging out the window with her tongue out.
188
+ * [abacusai/Smaug-Llama-3-70B-Instruct-32K](https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct-32K) - The vehicle by which the others are able to achieve tolerable highway speeds. (Some of the 8K version is in there too.)
189
+ * [turboderp/Cat-Llama-3-70B-instruct](https://huggingface.co/turboderp/Cat-Llama-3-70B-instruct) - Like 20% of one of the intermediate models has this in there. It's just a cat, curled up in the back seat somewhere, yet its influence may be greater than we know?
190
+
191
+ ### Configuration
192
+
193
+ The following YAML will reproduce this model via an iterated process of incestuous inbreeding. Your eyes will bleed. You have been warned.
194
+
195
+ ```yaml
196
+ name: new-dawn-llama3-70b-v0.13.2
197
+ models:
198
+ - model: bosonai/Higgs-Llama-3-70B
199
+ - model: turboderp/Cat-Llama-3-70B-instruct
200
+ merge_method: slerp
201
+ base_model: bosonai/Higgs-Llama-3-70B
202
+ parameters:
203
+ t:
204
+ - value: 0.2
205
+ dtype: float16
206
+ ---
207
+ name: new-dawn-llama3-70b-v0.14
208
+ models:
209
+ - model: bosonai/Higgs-Llama-3-70B
210
+ - model: abacusai/Smaug-Llama-3-70B-Instruct
211
+ merge_method: slerp
212
+ base_model: bosonai/Higgs-Llama-3-70B
213
+ parameters:
214
+ t:
215
+ - value: 0.5
216
+ dtype: float16
217
+ ---
218
+ name: new-dawn-llama3-70b-v0.15
219
+ models:
220
+ - model: new-dawn-llama3-70b-v0.13.2
221
+ - model: new-dawn-llama3-70b-v0.14
222
+ merge_method: slerp
223
+ base_model: new-dawn-llama3-70b-v0.13.2
224
+ parameters:
225
+ t:
226
+ - value: 0.5
227
+ dtype: float16
228
+ ---
229
+ name: new-dawn-llama3-70b-v0.16
230
+ models:
231
+ - model: Sao10K/L3-70B-Euryale-v2.1
232
+ - model: new-dawn-llama3-70b-v0.15
233
+ merge_method: slerp
234
+ base_model: new-dawn-llama3-70b-v0.15
235
+ parameters:
236
+ t:
237
+ - value: 0.4
238
+ dtype: float16
239
+ ---
240
+ # See https://huggingface.co/jukofyork/Dark-Miqu-70B/discussions/3
241
+ # Credit for merge recipe belongs to jukofyork
242
+ name: new-dawn-llama3-70b-v0.16-32K
243
+ merge_method: linear
244
+ models:
245
+ - model: abacusai/Smaug-Llama-3-70B-Instruct-32K
246
+ parameters:
247
+ weight:
248
+ - filter: v_proj
249
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
250
+ - filter: o_proj
251
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
252
+ - filter: up_proj
253
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
254
+ - filter: gate_proj
255
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
256
+ - filter: down_proj
257
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
258
+ - value: 1
259
+ - model: new-dawn-llama3-70b-v0.16
260
+ parameters:
261
+ weight:
262
+ - filter: v_proj
263
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
264
+ - filter: o_proj
265
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
266
+ - filter: up_proj
267
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
268
+ - filter: gate_proj
269
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
270
+ - filter: down_proj
271
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
272
+ - value: 0
273
+ base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
274
+ tokenizer_source: base
275
+ dtype: float16
276
+ ---
277
+ name: _1-Smaug-bonsai-slerp
278
+ models:
279
+ - model: abacusai/Smaug-Llama-3-70B-Instruct-32K
280
+ - model: bosonai/Higgs-Llama-3-70B
281
+ merge_method: slerp
282
+ base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
283
+ parameters:
284
+ t:
285
+ - value: 0.6
286
+ dtype: float16
287
+ ---
288
+ name: _2-Smaug-euryale-slerp
289
+ models:
290
+ - model: abacusai/Smaug-Llama-3-70B-Instruct-32K
291
+ - model: Sao10K/L3-70B-Euryale-v2.1
292
+ merge_method: slerp
293
+ base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
294
+ parameters:
295
+ t:
296
+ - value: 0.65
297
+ dtype: float16
298
+ ---
299
+ name: _3-Smaug-bonsai_Smaug-euryale-slerp
300
+ models:
301
+ - model: _1-Smaug-bonsai-slerp
302
+ - model: _2-Smaug-euryale-slerp
303
+ merge_method: slerp
304
+ base_model: _1-Smaug-bonsai-slerp
305
+ parameters:
306
+ t:
307
+ - value: 0.5
308
+ dtype: float16
309
+ ---
310
+ # See https://huggingface.co/jukofyork/Dark-Miqu-70B/discussions/3
311
+ # Credit for merge recipe belongs to jukofyork
312
+ name: new-dawn-llama3-70b-v0.18-32K
313
+ merge_method: linear
314
+ models:
315
+ - model: abacusai/Smaug-Llama-3-70B-Instruct-32K
316
+ parameters:
317
+ weight:
318
+ - filter: v_proj
319
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
320
+ - filter: o_proj
321
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
322
+ - filter: up_proj
323
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
324
+ - filter: gate_proj
325
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
326
+ - filter: down_proj
327
+ value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
328
+ - value: 1
329
+ - model: _3-Smaug-bonsair_Smaug-euryale-slerp
330
+ parameters:
331
+ weight:
332
+ - filter: v_proj
333
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
334
+ - filter: o_proj
335
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
336
+ - filter: up_proj
337
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
338
+ - filter: gate_proj
339
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
340
+ - filter: down_proj
341
+ value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
342
+ - value: 0
343
+ base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
344
+ tokenizer_source: base
345
+ dtype: float16
346
+ ---
347
+ name: new-dawn-llama3-70b-32K-v1.0
348
+ models:
349
+ - model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.16-32K
350
+ - model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.18-32K
351
+ merge_method: slerp
352
+ base_model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.16-32K
353
+ parameters:
354
+ t:
355
+ - value: 0.5
356
+ dtype: float16
357
+ ```