kennylam commited on
Commit
0e08b7b
1 Parent(s): bdf3a76

First commit for axolotl-config.yml

Browse files
Files changed (1) hide show
  1. axolotl-config.yml +143 -0
axolotl-config.yml ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: MediaTek-Research/Breeze-7B-Base-v0_1
2
+ model_type: MistralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+
11
+ datasets:
12
+ - path: hon9kon9ize/yue-alpaca
13
+ type: alpaca
14
+ - path: indiejoseph/wikipedia-translate-zhhk-zhcn
15
+ type:
16
+ system_prompt: ""
17
+ field_instruction: zh
18
+ field_output: yue
19
+ format: |-
20
+ [INST]
21
+ 翻譯下面中文至粵語廣東話(Cantonese)。
22
+
23
+ {instruction}
24
+ [/INST]
25
+ - path: indiejoseph/wikipedia-zh-yue-summaries
26
+ type:
27
+ system_prompt: ""
28
+ field_instruction: content
29
+ field_output: summary
30
+ format: |-
31
+ [INST]
32
+ 用粵語廣東話(Cantonese)總結一吓。
33
+
34
+ {instruction}
35
+ [/INST]
36
+ - path: indiejoseph/wikipedia-zh-yue-summaries
37
+ type:
38
+ system_prompt: ""
39
+ field_instruction: content
40
+ field_output: title
41
+ format: |-
42
+ [INST]
43
+ 粵語廣東話(Cantonese), 呢篇嘢主題係咩?
44
+
45
+ {instruction}
46
+ [/INST]
47
+ - path: indiejoseph/wikipedia-zh-yue-summaries
48
+ type:
49
+ system_prompt: ""
50
+ field_instruction: content
51
+ field_output: category
52
+ format: |-
53
+ [INST]
54
+ 粵語廣東話(Cantonese), 呢篇嘢講緊咩? 係咩分類?
55
+
56
+ {instruction}
57
+ [/INST]
58
+ - path: indiejoseph/wikipedia-zh-yue-qa
59
+ type:
60
+ system_prompt: ""
61
+ field_instruction: question
62
+ field_system: title
63
+ field_output: answer
64
+ format: |-
65
+ [INST]
66
+ 粵語廣東話(Cantonese), 以下係關於「{system}」嘅問題。
67
+
68
+ {instruction}
69
+ [/INST]
70
+
71
+ dataset_prepared_path: last_run_prepared
72
+ val_set_size: 0.05
73
+ output_dir: loras/Breeze-7B-Cantonese-v0.1
74
+ save_safetensors: true
75
+ #eval_sample_packing: False
76
+
77
+ ## You can optionally freeze the entire model and unfreeze a subset of parameters
78
+ unfrozen_parameters:
79
+ # - lm_head.*
80
+ # - model.embed_tokens.*
81
+ # - model.layers.2[0-9]+.block_sparse_moe.gate.*
82
+ # - model.layers.2[0-9]+.block_sparse_moe.experts.*
83
+ # - model.layers.3[0-9]+.block_sparse_moe.gate.*
84
+ # - model.layers.3[0-9]+.block_sparse_moe.experts.*
85
+
86
+ model_config:
87
+ output_router_logits: true
88
+
89
+ adapter: qlora
90
+ lora_model_dir:
91
+
92
+ sequence_len: 4096
93
+ sample_packing: true
94
+ pad_to_sequence_len: true
95
+
96
+ lora_r: 32
97
+ lora_alpha: 16
98
+ lora_dropout: 0.05
99
+ lora_target_linear: true
100
+ lora_fan_in_fan_out:
101
+
102
+
103
+ wandb_project:
104
+ wandb_entity:
105
+ wandb_watch:
106
+ wandb_name:
107
+ wandb_log_model:
108
+
109
+ gradient_accumulation_steps: 4
110
+ micro_batch_size: 2
111
+ num_epochs: 1
112
+ optimizer: adamw_bnb_8bit
113
+ lr_scheduler: cosine
114
+ learning_rate: 0.0002
115
+
116
+ train_on_inputs: false
117
+ group_by_length: false
118
+ bf16: true
119
+ fp16: false
120
+ tf32: false
121
+
122
+ gradient_checkpointing: true
123
+ early_stopping_patience:
124
+ resume_from_checkpoint:
125
+ local_rank:
126
+ logging_steps: 1
127
+ xformers_attention:
128
+ flash_attention: true
129
+
130
+ loss_watchdog_threshold: 5.0
131
+ loss_watchdog_patience: 3
132
+
133
+ warmup_steps: 10
134
+ evals_per_epoch: 4
135
+ eval_table_size:
136
+ eval_table_max_new_tokens: 128
137
+ saves_per_epoch: 1
138
+ debug:
139
+ deepspeed:
140
+ weight_decay: 0.0
141
+ fsdp:
142
+ fsdp_config:
143
+ special_tokens: