hamel commited on
Commit
1169ea0
1 Parent(s): 2e675f1

Create config/hc.yml

Browse files
Files changed (1) hide show
  1. config/hc.yml +89 -0
config/hc.yml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-7B-v0.1
2
+ model_type: MistralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: true
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ lora_fan_in_fan_out: false
11
+ data_seed: 49
12
+ seed: 49
13
+
14
+ datasets:
15
+ - path: _synth_data/alpaca_synth_queries_healed.jsonl
16
+ type: sharegpt
17
+ conversation: alpaca
18
+ dataset_prepared_path: last_run_prepared
19
+ val_set_size: 0.1
20
+ output_dir: ./qlora-alpaca-out
21
+ hub_model_id: hamel/hc-mistral-alpaca
22
+
23
+ adapter: qlora
24
+ lora_model_dir:
25
+
26
+ sequence_len: 896
27
+ sample_packing: false
28
+ pad_to_sequence_len: true
29
+
30
+ lora_r: 32
31
+ lora_alpha: 16
32
+ lora_dropout: 0.05
33
+ lora_target_linear: true
34
+ lora_fan_in_fan_out:
35
+ lora_target_modules:
36
+ - gate_proj
37
+ - down_proj
38
+ - up_proj
39
+ - q_proj
40
+ - v_proj
41
+ - k_proj
42
+ - o_proj
43
+
44
+ wandb_project: hc-axolotl-mistral
45
+ wandb_entity: hamelsmu
46
+
47
+ gradient_accumulation_steps: 4
48
+ micro_batch_size: 16
49
+ eval_batch_size: 16
50
+ num_epochs: 3
51
+ optimizer: adamw_bnb_8bit
52
+ lr_scheduler: cosine
53
+ learning_rate: 0.0002
54
+ max_grad_norm: 1.0
55
+ adam_beta2: 0.95
56
+ adam_epsilon: 0.00001
57
+ save_total_limit: 12
58
+
59
+ train_on_inputs: false
60
+ group_by_length: false
61
+ bf16: true
62
+ fp16: false
63
+ tf32: false
64
+
65
+ gradient_checkpointing: true
66
+ early_stopping_patience:
67
+ resume_from_checkpoint:
68
+ local_rank:
69
+ logging_steps: 1
70
+ xformers_attention:
71
+ flash_attention: true
72
+
73
+ loss_watchdog_threshold: 5.0
74
+ loss_watchdog_patience: 3
75
+
76
+ warmup_steps: 20
77
+ evals_per_epoch: 4
78
+ eval_table_size:
79
+ eval_table_max_new_tokens: 128
80
+ saves_per_epoch: 6
81
+ debug:
82
+ weight_decay: 0.0
83
+ fsdp:
84
+ fsdp_config:
85
+ special_tokens:
86
+ bos_token: "<s>"
87
+ eos_token: "</s>"
88
+ unk_token: "<unk>"
89
+ save_safetensors: true