Undi95 commited on
Commit
19918ab
1 Parent(s): f27954e

Upload Noromaid-8x7B-v3.yml

Browse files
Files changed (1) hide show
  1. Noromaid-8x7B-v3.yml +92 -0
Noromaid-8x7B-v3.yml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mixtral-8x7B-v0.1
2
+ model_type: MixtralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: false
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: /workspace/dataset/Aesir
12
+ type: completion
13
+ - path: /workspace/dataset/LimaRP
14
+ type: completion
15
+ - path: /workspace/dataset/no-robots-alpaca-completion-train.json
16
+ type: completion
17
+ - path: /workspace/dataset/toxicsharegpt-NoWarning-completion.jsonl
18
+ type: completion
19
+
20
+ dataset_prepared_path:
21
+ val_set_size: 0
22
+ output_dir: /workspace/Noromaid-v0.1-mixtral-8x7b
23
+ resume_from_checkpoint:
24
+ hf_use_auth_token:
25
+
26
+ adapter: qlora
27
+ lora_model_dir:
28
+
29
+ sequence_len: 16384
30
+ sample_packing: true
31
+ pad_to_sequence_len: true
32
+
33
+ lora_r: 32
34
+ lora_alpha: 16
35
+ lora_dropout: 0.05
36
+ lora_target_modules:
37
+ - q_proj
38
+ - k_proj
39
+ - v_proj
40
+ - o_proj
41
+ - w1
42
+ - w2
43
+ - w3
44
+ lora_target_linear:
45
+ lora_fan_in_fan_out:
46
+ lora_modules_to_save:
47
+ - embed_tokens
48
+ - lm_head
49
+
50
+ wandb_project: Noromaid-v0.1-mixtral-8x7b-v3
51
+ wandb_entity:
52
+ wandb_watch:
53
+ wandb_run_id:
54
+ wandb_log_model:
55
+
56
+ gradient_accumulation_steps: 1
57
+ micro_batch_size: 2
58
+ num_epochs: 3
59
+ optimizer: paged_adamw_8bit
60
+ lr_scheduler: cosine
61
+ learning_rate: 0.0002
62
+
63
+ model_config:
64
+ output_router_logits: true
65
+
66
+ train_on_inputs:
67
+ group_by_length: false
68
+ bf16: true
69
+ fp16: false
70
+ tf32: false
71
+
72
+ gradient_checkpointing: true
73
+ early_stopping_patience:
74
+ local_rank:
75
+ logging_steps: 1
76
+ xformers_attention:
77
+ flash_attention: true
78
+
79
+ loss_watchdog_threshold: 5.0
80
+ loss_watchdog_patience: 3
81
+
82
+ warmup_steps: 10
83
+ evals_per_epoch: 4
84
+ eval_table_size:
85
+ eval_table_max_new_tokens: 128
86
+ saves_per_epoch: 2
87
+ debug:
88
+ deepspeed:
89
+ weight_decay: 0.0
90
+ fsdp:
91
+ fsdp_config:
92
+ special_tokens: