File size: 2,971 Bytes
0e08b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
base_model: MediaTek-Research/Breeze-7B-Base-v0_1
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
is_mistral_derived_model: true

load_in_8bit: false
load_in_4bit: true
strict: false


datasets:
- path: hon9kon9ize/yue-alpaca
  type: alpaca
- path: indiejoseph/wikipedia-translate-zhhk-zhcn
  type:
    system_prompt: ""
    field_instruction: zh
    field_output: yue
    format: |-
      [INST]
      翻譯下面中文至粵語廣東話(Cantonese)。

      {instruction}
      [/INST]
- path: indiejoseph/wikipedia-zh-yue-summaries
  type:
    system_prompt: ""
    field_instruction: content
    field_output: summary
    format: |-
      [INST]
      用粵語廣東話(Cantonese)總結一吓。

      {instruction}
      [/INST]
- path: indiejoseph/wikipedia-zh-yue-summaries
  type:
    system_prompt: ""
    field_instruction: content
    field_output: title
    format: |-
      [INST]
      粵語廣東話(Cantonese), 呢篇嘢主題係咩?

      {instruction}
      [/INST]
- path: indiejoseph/wikipedia-zh-yue-summaries
  type:
    system_prompt: ""
    field_instruction: content
    field_output: category
    format: |-
      [INST]
      粵語廣東話(Cantonese), 呢篇嘢講緊咩? 係咩分類?

      {instruction}
      [/INST]
- path: indiejoseph/wikipedia-zh-yue-qa
  type:
    system_prompt: ""
    field_instruction: question
    field_system: title
    field_output: answer
    format: |-
      [INST]
      粵語廣東話(Cantonese), 以下係關於「{system}」嘅問題。

      {instruction}
      [/INST]

dataset_prepared_path: last_run_prepared
val_set_size: 0.05
output_dir: loras/Breeze-7B-Cantonese-v0.1
save_safetensors: true
#eval_sample_packing: False

## You can optionally freeze the entire model and unfreeze a subset of parameters
unfrozen_parameters:
#  - lm_head.*
#  - model.embed_tokens.*
#  - model.layers.2[0-9]+.block_sparse_moe.gate.*
#  - model.layers.2[0-9]+.block_sparse_moe.experts.*
#  - model.layers.3[0-9]+.block_sparse_moe.gate.*
#  - model.layers.3[0-9]+.block_sparse_moe.experts.*

model_config:
  output_router_logits: true

adapter: qlora
lora_model_dir:

sequence_len: 4096
sample_packing: true
pad_to_sequence_len: true

lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true
lora_fan_in_fan_out:


wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 1
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true

loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3

warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens: