GGUF
Generated from Trainer
axolotl
LoneStriker commited on
Commit
5c39929
1 Parent(s): 9fc8d0b

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ dolphin-2.9.1-yi-1.5-34b-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
2
+ dolphin-2.9.1-yi-1.5-34b-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
3
+ dolphin-2.9.1-yi-1.5-34b-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
4
+ dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
5
+ dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: 01-ai/Yi-1.5-34B
4
+ tags:
5
+ - generated_from_trainer
6
+ - axolotl
7
+ datasets:
8
+ - cognitivecomputations/Dolphin-2.9
9
+ - teknium/OpenHermes-2.5
10
+ - m-a-p/CodeFeedback-Filtered-Instruction
11
+ - cognitivecomputations/dolphin-coder
12
+ - cognitivecomputations/samantha-data
13
+ - microsoft/orca-math-word-problems-200k
14
+ - Locutusque/function-calling-chatml
15
+ - internlm/Agent-FLAN
16
+ ---
17
+
18
+ # Dolphin 2.9.1 Yi 1.5 34b 🐬
19
+
20
+ Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
21
+
22
+ This is our most spectacular outcome ever. FFT, all parameters, 16bit. 77.4 MMLU on 34b. And it talks like a dream.
23
+
24
+ Although the max positional embeddings is 4k, we used rope theta of 1000000.0 and we trained with sequence length 8k. We plan to train on the upcoming 32k version as well.
25
+
26
+ Discord: https://discord.gg/8fbBeC7ZGx
27
+
28
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png" width="600" />
29
+
30
+ Our appreciation for the sponsors of Dolphin 2.9.1:
31
+ - [Crusoe Cloud](https://crusoe.ai/) - provided excellent on-demand 8xH100 node
32
+ - [OnDemand](https://on-demand.io/) - provided inference sponsorship
33
+
34
+ This model is based on Yi-1.5-34b, and is governed by apache 2.0 license.
35
+
36
+ The base model has 4k context, but we used rope theta of 1000000.0 and the full-weight fine-tuning was with 8k sequence length.
37
+
38
+ Dolphin 2.9.1 uses ChatML prompt template format.
39
+
40
+ example:
41
+
42
+ ```
43
+ <|im_start|>system
44
+ You are Dolphin, a helpful AI assistant.<|im_end|>
45
+ <|im_start|>user
46
+ {prompt}<|im_end|>
47
+ <|im_start|>assistant
48
+
49
+ ```
50
+
51
+ Dolphin-2.9.1 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling.
52
+
53
+ Dolphin is uncensored. We have filtered the dataset to remove alignment and bias. This makes the model more compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. Please read my blog post about uncensored models. https://erichartford.com/uncensored-models You are responsible for any content you create using this model. Enjoy responsibly.
54
+
55
+ Dolphin is licensed according to apache 2.0 license. We grant permission for any use, including commercial. Dolphin was trained on data generated from GPT4, among other models.
56
+
57
+ ## Evals
58
+
59
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/coI4WEJEJD4lhSWgMOjIr.png)
60
+
61
+ ## Training
62
+
63
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
64
+ <details><summary>See axolotl config</summary>
65
+
66
+ axolotl version: `0.4.0`
67
+ ```yaml
68
+ base_model: 01-ai/Yi-1.5-34B
69
+ model_type: LlamaForCausalLM
70
+ tokenizer_type: LlamaTokenizer
71
+ trust_remote_code: true
72
+
73
+ # load_in_8bit: false
74
+ # load_in_4bit: true
75
+ # strict: false
76
+
77
+ # adapter: qlora
78
+ # lora_modules_to_save: [embed_tokens, lm_head]
79
+
80
+ # lora_r: 32
81
+ # lora_alpha: 16
82
+ # lora_dropout: 0.05
83
+ # lora_target_linear: True
84
+ # lora_fan_in_fan_out:
85
+
86
+ datasets:
87
+ - path: /workspace/datasets/dolphin-2.9/dolphin201-sharegpt2.jsonl
88
+ type: sharegpt
89
+ conversation: chatml
90
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-translate-sharegpt2.jsonl
91
+ type: sharegpt
92
+ conversation: chatml
93
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-codegen-sharegpt2.jsonl
94
+ type: sharegpt
95
+ conversation: chatml
96
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl
97
+ type: sharegpt
98
+ conversation: chatml
99
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl
100
+ type: sharegpt
101
+ conversation: chatml
102
+ - path: /workspace/datasets/dolphin-2.9/not_samantha_norefusals.jsonl
103
+ type: sharegpt
104
+ conversation: chatml
105
+ - path: /workspace/datasets/dolphin-2.9/Orca-Math-resort-unfiltered.jsonl
106
+ type: sharegpt
107
+ conversation: chatml
108
+ - path: /workspace/datasets/dolphin-2.9/agent_instruct_react_unfiltered.jsonl
109
+ type: sharegpt
110
+ conversation: chatml
111
+ - path: /workspace/datasets/dolphin-2.9/toolbench_instruct_j1s1_3k_unfiltered.jsonl
112
+ type: sharegpt
113
+ conversation: chatml
114
+ - path: /workspace/datasets/dolphin-2.9/toolbench_negative_unfiltered.jsonl
115
+ type: sharegpt
116
+ conversation: chatml
117
+ - path: /workspace/datasets/dolphin-2.9/toolbench_react_10p_unfiltered.jsonl
118
+ type: sharegpt
119
+ conversation: chatml
120
+ - path: /workspace/datasets/dolphin-2.9/toolbench_tflan_cot_30p_unfiltered.jsonl
121
+ type: sharegpt
122
+ conversation: chatml
123
+ - path: /workspace/datasets/dolphin-2.9/openhermes200k_unfiltered.jsonl
124
+ type: sharegpt
125
+ conversation: chatml
126
+
127
+ chat_template: chatml
128
+
129
+ dataset_prepared_path: yi34b
130
+ val_set_size: 0.01
131
+ output_dir: ./out-yi
132
+
133
+ sequence_len: 8192
134
+ sample_packing: true
135
+ pad_to_sequence_len: true
136
+
137
+ wandb_project: dolphin-2.9-yi-34b
138
+ wandb_watch:
139
+ wandb_run_id:
140
+ wandb_log_model:
141
+
142
+ gradient_accumulation_steps: 8
143
+ micro_batch_size: 1
144
+ num_epochs: 3
145
+ optimizer: adamw_8bit
146
+ lr_scheduler: cosine
147
+ learning_rate: 1e-5
148
+
149
+ train_on_inputs: false
150
+ group_by_length: false
151
+ bf16: auto
152
+ fp16:
153
+ tf32: true
154
+
155
+ gradient_checkpointing: true
156
+ gradient_checkpointing_kwargs:
157
+ use_reentrant: false
158
+ early_stopping_patience:
159
+ # resume_from_checkpoint: /workspace/axolotl/dbrx-checkpoint
160
+ logging_steps: 1
161
+ xformers_attention:
162
+ flash_attention: true
163
+
164
+ warmup_steps: 10
165
+ evals_per_epoch: 4
166
+ eval_table_size:
167
+ saves_per_epoch: 4
168
+ save_total_limit: 2
169
+ save_steps:
170
+ debug:
171
+ deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16.json
172
+ weight_decay: 0.05
173
+ fsdp:
174
+ fsdp_config:
175
+ special_tokens:
176
+ bos_token: "<|startoftext|>"
177
+ eos_token: "<|im_end|>"
178
+ pad_token: "<unk>"
179
+ unk_token: "<unk>"
180
+ tokens:
181
+ - "<|im_start|>"
182
+
183
+
184
+ ```
185
+
186
+ </details><br>
187
+
188
+ # out-yi
189
+
190
+ This model is a fine-tuned version of [01-ai/Yi-1.5-34B](https://huggingface.co/01-ai/Yi-1.5-34B) on the None dataset.
191
+ It achieves the following results on the evaluation set:
192
+ - Loss: 0.4425
193
+
194
+ ## Model description
195
+
196
+ More information needed
197
+
198
+ ## Intended uses & limitations
199
+
200
+ More information needed
201
+
202
+ ## Training and evaluation data
203
+
204
+ More information needed
205
+
206
+ ## Training procedure
207
+
208
+ ### Training hyperparameters
209
+
210
+ The following hyperparameters were used during training:
211
+ - learning_rate: 1e-05
212
+ - train_batch_size: 1
213
+ - eval_batch_size: 1
214
+ - seed: 42
215
+ - distributed_type: multi-GPU
216
+ - num_devices: 8
217
+ - gradient_accumulation_steps: 8
218
+ - total_train_batch_size: 64
219
+ - total_eval_batch_size: 8
220
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
221
+ - lr_scheduler_type: cosine
222
+ - lr_scheduler_warmup_steps: 10
223
+ - num_epochs: 3
224
+
225
+ ### Training results
226
+
227
+ | Training Loss | Epoch | Step | Validation Loss |
228
+ |:-------------:|:-----:|:----:|:---------------:|
229
+ | 0.6265 | 0.0 | 1 | 0.6035 |
230
+ | 0.4674 | 0.25 | 327 | 0.4344 |
231
+ | 0.4337 | 0.5 | 654 | 0.4250 |
232
+ | 0.4346 | 0.75 | 981 | 0.4179 |
233
+ | 0.3985 | 1.0 | 1308 | 0.4118 |
234
+ | 0.3128 | 1.23 | 1635 | 0.4201 |
235
+ | 0.3261 | 1.48 | 1962 | 0.4157 |
236
+ | 0.3259 | 1.73 | 2289 | 0.4122 |
237
+ | 0.3126 | 1.98 | 2616 | 0.4079 |
238
+ | 0.2265 | 2.21 | 2943 | 0.4441 |
239
+ | 0.2297 | 2.46 | 3270 | 0.4427 |
240
+ | 0.2424 | 2.71 | 3597 | 0.4425 |
241
+
242
+
243
+ ### Framework versions
244
+
245
+ - Transformers 4.40.0.dev0
246
+ - Pytorch 2.2.2+cu121
247
+ - Datasets 2.15.0
248
+ - Tokenizers 0.15.0
dolphin-2.9.1-yi-1.5-34b-Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad7ebad14d69da20e615f57c6053c1f0856c5b0451da1f16a67840187a08bd4
3
+ size 18139445728
dolphin-2.9.1-yi-1.5-34b-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c0277a121e01d1df6f383586df1a1b926e2a0bc6cc5e72b9c54cb9eb4e0b2b
3
+ size 20658711008
dolphin-2.9.1-yi-1.5-34b-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f3f5174a5a48352dff6347a578470fa6b3272fc2550787d0aacec391779b1c
3
+ size 24321845728
dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc68bf3b4b012a7ea090bc05cd0bb759a1b974d83190fce4d338bf22b93e3f1b
3
+ size 28213926368
dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8aa2193c61d420d23abcebaa46cf11aaebdbf18bd6af5b801f61dbac2d568a
3
+ size 36542282208