winglian commited on
Commit
df1359a
1 Parent(s): 64bee85

organize the axolotl.yml and adapters, set cache

Browse files
README.md CHANGED
@@ -3,9 +3,11 @@ library_name: peft
3
  base_model: mistralai/Mixtral-8x7B-v0.1
4
  ---
5
 
6
- # Model Card for Model ID
7
 
8
- <!-- Provide a quick summary of what the model is/does. -->
 
 
9
 
10
 
11
 
@@ -217,99 +219,4 @@ The following `bitsandbytes` quantization config was used during training:
217
  ### Framework versions
218
 
219
 
220
- - PEFT 0.6.0
221
- ## Training procedure
222
-
223
-
224
- The following `bitsandbytes` quantization config was used during training:
225
- - quant_method: bitsandbytes
226
- - load_in_8bit: False
227
- - load_in_4bit: True
228
- - llm_int8_threshold: 6.0
229
- - llm_int8_skip_modules: None
230
- - llm_int8_enable_fp32_cpu_offload: False
231
- - llm_int8_has_fp16_weight: False
232
- - bnb_4bit_quant_type: nf4
233
- - bnb_4bit_use_double_quant: True
234
- - bnb_4bit_compute_dtype: bfloat16
235
-
236
- ### Framework versions
237
-
238
-
239
- - PEFT 0.6.0
240
- ## Training procedure
241
-
242
-
243
- The following `bitsandbytes` quantization config was used during training:
244
- - quant_method: bitsandbytes
245
- - load_in_8bit: False
246
- - load_in_4bit: True
247
- - llm_int8_threshold: 6.0
248
- - llm_int8_skip_modules: None
249
- - llm_int8_enable_fp32_cpu_offload: False
250
- - llm_int8_has_fp16_weight: False
251
- - bnb_4bit_quant_type: nf4
252
- - bnb_4bit_use_double_quant: True
253
- - bnb_4bit_compute_dtype: bfloat16
254
-
255
- ### Framework versions
256
-
257
-
258
- - PEFT 0.6.0
259
- ## Training procedure
260
-
261
-
262
- The following `bitsandbytes` quantization config was used during training:
263
- - quant_method: bitsandbytes
264
- - load_in_8bit: False
265
- - load_in_4bit: True
266
- - llm_int8_threshold: 6.0
267
- - llm_int8_skip_modules: None
268
- - llm_int8_enable_fp32_cpu_offload: False
269
- - llm_int8_has_fp16_weight: False
270
- - bnb_4bit_quant_type: nf4
271
- - bnb_4bit_use_double_quant: True
272
- - bnb_4bit_compute_dtype: bfloat16
273
-
274
- ### Framework versions
275
-
276
-
277
- - PEFT 0.6.0
278
- ## Training procedure
279
-
280
-
281
- The following `bitsandbytes` quantization config was used during training:
282
- - quant_method: bitsandbytes
283
- - load_in_8bit: False
284
- - load_in_4bit: True
285
- - llm_int8_threshold: 6.0
286
- - llm_int8_skip_modules: None
287
- - llm_int8_enable_fp32_cpu_offload: False
288
- - llm_int8_has_fp16_weight: False
289
- - bnb_4bit_quant_type: nf4
290
- - bnb_4bit_use_double_quant: True
291
- - bnb_4bit_compute_dtype: bfloat16
292
-
293
- ### Framework versions
294
-
295
-
296
- - PEFT 0.6.0
297
- ## Training procedure
298
-
299
-
300
- The following `bitsandbytes` quantization config was used during training:
301
- - quant_method: bitsandbytes
302
- - load_in_8bit: False
303
- - load_in_4bit: True
304
- - llm_int8_threshold: 6.0
305
- - llm_int8_skip_modules: None
306
- - llm_int8_enable_fp32_cpu_offload: False
307
- - llm_int8_has_fp16_weight: False
308
- - bnb_4bit_quant_type: nf4
309
- - bnb_4bit_use_double_quant: True
310
- - bnb_4bit_compute_dtype: bfloat16
311
-
312
- ### Framework versions
313
-
314
-
315
  - PEFT 0.6.0
 
3
  base_model: mistralai/Mixtral-8x7B-v0.1
4
  ---
5
 
6
+ # SlimOrca Mixtral 8x7B
7
 
8
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
9
+
10
+ Official release of the SlimOrca Mixtral finetune. More details to come.
11
 
12
 
13
 
 
219
  ### Framework versions
220
 
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  - PEFT 0.6.0
adapter_model.bin DELETED
File without changes
adapter_config.json → adapters/adapter_config.json RENAMED
File without changes
adapter_model.safetensors → adapters/adapter_model.safetensors RENAMED
File without changes
config.json CHANGED
@@ -37,6 +37,6 @@
37
  "tie_word_embeddings": false,
38
  "torch_dtype": "bfloat16",
39
  "transformers_version": "4.36.0.dev0",
40
- "use_cache": false,
41
  "vocab_size": 32002
42
  }
 
37
  "tie_word_embeddings": false,
38
  "torch_dtype": "bfloat16",
39
  "transformers_version": "4.36.0.dev0",
40
+ "use_cache": true,
41
  "vocab_size": 32002
42
  }
configs/mixtral.yml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mixtral-8x7B-v0.1
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ trust_remote_code: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: Open-Orca/SlimOrca
12
+ type: sharegpt
13
+ conversation: chatml
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.005
16
+ output_dir: ./slimorca-mixtral-out
17
+ save_total_limit: 2
18
+ hub_model_id: openaccess-ai-collective/slimorca-mixstral-8x7b
19
+ dataloader_num_workers: 8
20
+ dataloader_prefetch_factor: 4
21
+ dataloader_pin_memory: true
22
+
23
+ adapter: qlora
24
+ lora_model_dir:
25
+
26
+ sequence_len: 8192
27
+ sample_packing: true
28
+ pad_to_sequence_len: true
29
+
30
+ lora_r: 64
31
+ lora_alpha: 32
32
+ lora_dropout: 0.1
33
+ lora_target_linear: true
34
+ lora_fan_in_fan_out:
35
+ lora_modules_to_save:
36
+ - lm_head
37
+ - embed_tokens
38
+ #lora_target_modules:
39
+ # - gate
40
+ # - q_proj
41
+ # - k_proj
42
+ # - v_proj
43
+ # - o_proj
44
+ # - w1
45
+ # - w2
46
+ # - w3
47
+
48
+ wandb_project: slimorca-mixtral
49
+ wandb_entity: oaaic
50
+ wandb_watch:
51
+ wandb_name:
52
+ wandb_log_model:
53
+
54
+ gradient_accumulation_steps: 1
55
+ micro_batch_size: 4
56
+ num_epochs: 2
57
+ optimizer: paged_adamw_8bit
58
+ lr_scheduler: cosine
59
+ learning_rate: 0.001
60
+ adam_beta2: 0.95
61
+ adam_epsilon: 0.00001
62
+ max_grad_norm: 1.0
63
+
64
+ train_on_inputs: false
65
+ group_by_length: false
66
+ bf16: true
67
+ fp16: false
68
+ tf32: true
69
+
70
+ gradient_checkpointing: true
71
+ early_stopping_patience:
72
+ resume_from_checkpoint:
73
+ local_rank:
74
+ logging_steps: 1
75
+ xformers_attention:
76
+ flash_attention: true
77
+
78
+ warmup_steps: 100
79
+ eval_steps: 0.05
80
+ save_steps: 0.25
81
+ debug:
82
+ deepspeed: deepspeed/zero2.json
83
+ weight_decay: 0.1
84
+ fsdp:
85
+ fsdp_config:
86
+ special_tokens:
87
+ eos_token: "<|im_end|>"
88
+ tokens:
89
+ - "<|im_start|>"
90
+
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81cf5b750c5db7e8c430292756468a525455696a7015023e27c2a9bc77e7df78
3
- size 6011