Undi95 commited on
Commit
6b8c802
·
1 Parent(s): ef05db7

Upload Noromaid-8x7B.yml

Browse files
Files changed (1) hide show
  1. Noromaid-8x7B.yml +96 -0
Noromaid-8x7B.yml ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mixtral-8x7B-v0.1
2
+ model_type: MixtralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: false
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: /workspace/datasets/aesir_modified.json
12
+ type: completion
13
+ - path: /workspace/datasets/aesir2_modified.json
14
+ type: completion
15
+ - path: /workspace/datasets/LimaRP-NoToken-NoLength.jsonl
16
+ type: completion
17
+ - path: /workspace/datasets/norobots.jsonl
18
+ type: sharegpt
19
+ conversation: alpaca
20
+ - path: /workspace/datasets/toxicsharegpt-NoWarning.jsonl
21
+ type: sharegpt
22
+ conversation: alpaca
23
+
24
+ dataset_prepared_path:
25
+ val_set_size: 0
26
+ output_dir: /workspace/Noromaid-v0.1-mixtral-8x7b
27
+ resume_from_checkpoint:
28
+ hf_use_auth_token:
29
+
30
+ adapter: qlora
31
+ lora_model_dir:
32
+
33
+ sequence_len: 16384
34
+ sample_packing: true
35
+ pad_to_sequence_len: true
36
+
37
+ lora_r: 32
38
+ lora_alpha: 16
39
+ lora_dropout: 0.05
40
+ lora_target_modules:
41
+ - q_proj
42
+ - k_proj
43
+ - v_proj
44
+ - o_proj
45
+ - w1
46
+ - w2
47
+ - w3
48
+ lora_target_linear:
49
+ lora_fan_in_fan_out:
50
+ lora_modules_to_save:
51
+ - embed_tokens
52
+ - lm_head
53
+
54
+ wandb_project: Noromaid-v0.1-mixtral-8x7b
55
+ wandb_entity:
56
+ wandb_watch:
57
+ wandb_run_id:
58
+ wandb_log_model:
59
+
60
+ gradient_accumulation_steps: 1
61
+ micro_batch_size: 2
62
+ num_epochs: 2
63
+ optimizer: paged_adamw_8bit
64
+ lr_scheduler: cosine
65
+ learning_rate: 0.0002
66
+
67
+ model_config:
68
+ output_router_logits: true
69
+
70
+ train_on_inputs:
71
+ group_by_length: false
72
+ bf16: true
73
+ fp16: false
74
+ tf32: false
75
+
76
+ gradient_checkpointing: true
77
+ early_stopping_patience:
78
+ local_rank:
79
+ logging_steps: 1
80
+ xformers_attention:
81
+ flash_attention: true
82
+
83
+ loss_watchdog_threshold: 5.0
84
+ loss_watchdog_patience: 3
85
+
86
+ warmup_steps: 10
87
+ evals_per_epoch: 4
88
+ eval_table_size:
89
+ eval_table_max_new_tokens: 128
90
+ saves_per_epoch: 1
91
+ debug:
92
+ deepspeed:
93
+ weight_decay: 0.0
94
+ fsdp:
95
+ fsdp_config:
96
+ special_tokens: