GGUF
Generated from Trainer
axolotl
LoneStriker commited on
Commit
52f5343
1 Parent(s): 6e1277f

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ dolphin-2.9.1-llama-3-8b-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
2
+ dolphin-2.9.1-llama-3-8b-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
3
+ dolphin-2.9.1-llama-3-8b-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
4
+ dolphin-2.9.1-llama-3-8b-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
5
+ dolphin-2.9.1-llama-3-8b-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: meta-llama/Meta-Llama-3-8B
4
+ tags:
5
+ - generated_from_trainer
6
+ - axolotl
7
+ model-index:
8
+ - name: out
9
+ results: []
10
+ datasets:
11
+ - cognitivecomputations/Dolphin-2.9
12
+ - teknium/OpenHermes-2.5
13
+ - m-a-p/CodeFeedback-Filtered-Instruction
14
+ - cognitivecomputations/dolphin-coder
15
+ - cognitivecomputations/samantha-data
16
+ - microsoft/orca-math-word-problems-200k
17
+ - Locutusque/function-calling-chatml
18
+ - internlm/Agent-FLAN
19
+ ---
20
+
21
+ # Dolphin 2.9.1 Llama 3 8b 🐬
22
+
23
+ Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
24
+
25
+ Discord: https://discord.gg/8fbBeC7ZGx
26
+
27
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png" width="600" />
28
+
29
+ We have retrained our LLama-3-8b fine tune to address behavioral issues in the initial 2.9 dataset. Specifically, Systemchat was causing the model to be *too* reliant on the system prompt. Additionally, it had an occasional quirk that would cause the model to overly reference the system prompt. We also found generation length was at times not sufficient for any given task. We identified the culprit as Ultrachat. Accounting for these concerns, we removed systemchat and ultrachat from the dataset. It is otherwise identical to dolphin-2.9.
30
+
31
+ Our appreciation for the sponsors of Dolphin 2.9.1:
32
+ - [Crusoe Cloud](https://crusoe.ai/) - provided excellent on-demand 8xL40S node
33
+
34
+ This model is based on Llama-3-8b, and is governed by [META LLAMA 3 COMMUNITY LICENSE AGREEMENT](LICENSE)
35
+
36
+ The base model has 8k context, and the full-weight fine-tuning was with 4k sequence length.
37
+
38
+ It took 1.5 days on an 8x L40S provided by Crusoe Cloud
39
+
40
+ This model was trained FFT on all parameters, using ChatML prompt template format.
41
+
42
+ example:
43
+
44
+ ```
45
+ <|im_start|>system
46
+ You are Dolphin, a helpful AI assistant.<|im_end|>
47
+ <|im_start|>user
48
+ {prompt}<|im_end|>
49
+ <|im_start|>assistant
50
+
51
+ ```
52
+
53
+ Dolphin-2.9.1 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling.
54
+
55
+ Dolphin is uncensored. We have filtered the dataset to remove alignment and bias. This makes the model more compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. Please read my blog post about uncensored models. https://erichartford.com/uncensored-models You are responsible for any content you create using this model. Enjoy responsibly.
56
+
57
+ Dolphin is licensed according to Meta's Llama license. We grant permission for any use, including commercial, that falls within accordance with Meta's Llama-3 license. Dolphin was trained on data generated from GPT4, among other models.
58
+
59
+ ## Evals
60
+
61
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/0pqSc8jsJlhBH8dcgpwE7.png)
62
+
63
+ ## Training
64
+
65
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
66
+ <details><summary>See axolotl config</summary>
67
+
68
+ axolotl version: `0.4.0`
69
+ ```yaml
70
+ base_model: meta-llama/Meta-Llama-3-8B
71
+ model_type: AutoModelForCausalLM
72
+ tokenizer_type: AutoTokenizer
73
+ tokenizer_use_fast: false
74
+
75
+
76
+ load_in_8bit: false
77
+ load_in_4bit: false
78
+ strict: false
79
+ model_config:
80
+
81
+ datasets:
82
+ - path: /workspace/datasets/dolphin-2.9/dolphin201-sharegpt2.jsonl
83
+ type: sharegpt
84
+ conversation: chatml
85
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-translate-sharegpt2.jsonl
86
+ type: sharegpt
87
+ conversation: chatml
88
+ - path: /workspace/datasets/dolphin-2.9/dolphin-coder-codegen-sharegpt2.jsonl
89
+ type: sharegpt
90
+ conversation: chatml
91
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_Code-Feedback-sharegpt-unfiltered.jsonl
92
+ type: sharegpt
93
+ conversation: chatml
94
+ - path: /workspace/datasets/dolphin-2.9/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt-unfiltered.jsonl
95
+ type: sharegpt
96
+ conversation: chatml
97
+ - path: /workspace/datasets/dolphin-2.9/not_samantha_norefusals.jsonl
98
+ type: sharegpt
99
+ conversation: chatml
100
+ - path: /workspace/datasets/dolphin-2.9/Orca-Math-resort-unfiltered.jsonl
101
+ type: sharegpt
102
+ conversation: chatml
103
+ - path: /workspace/datasets/dolphin-2.9/agent_instruct_react_unfiltered.jsonl
104
+ type: sharegpt
105
+ conversation: chatml
106
+ - path: /workspace/datasets/dolphin-2.9/toolbench_instruct_j1s1_3k_unfiltered.jsonl
107
+ type: sharegpt
108
+ conversation: chatml
109
+ - path: /workspace/datasets/dolphin-2.9/toolbench_negative_unfiltered.jsonl
110
+ type: sharegpt
111
+ conversation: chatml
112
+ - path: /workspace/datasets/dolphin-2.9/toolbench_react_10p_unfiltered.jsonl
113
+ type: sharegpt
114
+ conversation: chatml
115
+ - path: /workspace/datasets/dolphin-2.9/toolbench_tflan_cot_30p_unfiltered.jsonl
116
+ type: sharegpt
117
+ conversation: chatml
118
+ - path: /workspace/datasets/dolphin-2.9/openhermes200k_unfiltered.jsonl
119
+ type: sharegpt
120
+ conversation: chatml
121
+
122
+ chat_template: chatml
123
+
124
+
125
+ dataset_prepared_path: /workspace/datasets/dolphin-2.9/thingy
126
+ val_set_size: 0.0002
127
+ output_dir: ./out
128
+
129
+ sequence_len: 4096
130
+ sample_packing: true
131
+ pad_to_sequence_len: true
132
+
133
+ gradient_accumulation_steps: 4
134
+ micro_batch_size: 3
135
+ num_epochs: 3
136
+ logging_steps: 1
137
+ optimizer: adamw_8bit
138
+ lr_scheduler: cosine
139
+ learning_rate: 2e-5
140
+
141
+ wandb_project: dolphin-2.9-mixtral-8x22b
142
+ wandb_watch:
143
+ wandb_run_id:
144
+ wandb_log_model:
145
+
146
+ train_on_inputs: false
147
+ group_by_length: false
148
+ bf16: auto
149
+ fp16:
150
+ tf32: false
151
+
152
+ gradient_checkpointing: true
153
+ gradient_checkpointing_kwargs:
154
+ use_reentrant: false
155
+ early_stopping_patience:
156
+ resume_from_checkpoint:
157
+ local_rank:
158
+ logging_steps: 1
159
+ xformers_attention:
160
+ flash_attention: true
161
+ saves_per_epoch: 4
162
+ save_total_limit: 2
163
+ save_steps:
164
+ evals_per_epoch: 4
165
+ eval_sample_packing: false
166
+ debug:
167
+ deepspeed: deepspeed_configs/zero3_bf16.json
168
+ weight_decay: 0.05
169
+ fsdp:
170
+ fsdp_config:
171
+ special_tokens:
172
+ eos_token: "<|im_end|>"
173
+ pad_token: "<|end_of_text|>"
174
+ tokens:
175
+ - "<|im_start|>"
176
+ - "<|im_end|>"
177
+
178
+ ```
179
+
180
+ </details><br>
181
+
182
+ ### Framework versions
183
+
184
+ - Transformers 4.40.0
185
+ - Pytorch 2.2.2+cu121
186
+ - Datasets 2.18.0
187
+ - Tokenizers 0.19.1
dolphin-2.9.1-llama-3-8b-Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b298425cf39c558a608554ad977c86129ced7d00d17ca46d69c37f6a6bbe0c41
3
+ size 4321966368
dolphin-2.9.1-llama-3-8b-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d2ab9d4ca41be70c39f00ad3f8b0f890bd32999a6138336628332d23014045
3
+ size 4920745312
dolphin-2.9.1-llama-3-8b-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e4a1637ca10fde28829627b1adb9e7e5611bf8d7fbf5d3a0ea7e2e3ca62a642
3
+ size 5732999520
dolphin-2.9.1-llama-3-8b-Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e4fd16806d9af4d5cec83dc2153fbaa1b9d9c2e801e19805d110e9c553098a
3
+ size 6596019616
dolphin-2.9.1-llama-3-8b-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec41846af26d9d6c2eb544e8047ff17c5bec82c3266b205ed0f7d91a6c16aac
3
+ size 8540788000