thanhdaonguyen commited on
Commit
2dcb2fb
1 Parent(s): 2c1bf7e

Update config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +81 -0
config.yaml CHANGED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Hermes-2-Pro-Mistral-7B
2
+ model_type: MistralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+
5
+ load_in_8bit: false
6
+ load_in_4bit: true
7
+ strict: false
8
+
9
+ datasets:
10
+ # - path: roleplay4fun/20240327_pippa_segmented_long_experiment_01
11
+ # type: input_output
12
+ - path: roleplay4fun/20240327_limarp_segmented_experiment_00
13
+ type: input_output
14
+ - path: roleplay4fun/20240327_aesir_segmented_experiment_00
15
+ type: input_output
16
+ dataset_prepared_path: last_run_prepared
17
+ val_set_size: 0.0
18
+ output_dir: ./qlora-out-3
19
+
20
+ adapter: qlora
21
+ lora_model_dir:
22
+
23
+ sequence_len: 4096
24
+ sample_packing: true
25
+ pad_to_sequence_len: true
26
+
27
+ lora_r: 64
28
+ lora_alpha: 16
29
+ lora_dropout: 0.05
30
+ lora_target_linear: true
31
+ lora_fan_in_fan_out:
32
+ lora_target_modules:
33
+ - gate_proj
34
+ - down_proj
35
+ - up_proj
36
+ - q_proj
37
+ - v_proj
38
+ - k_proj
39
+ - o_proj
40
+
41
+ wandb_project:
42
+ wandb_entity:
43
+ wandb_watch:
44
+ wandb_name:
45
+ wandb_log_model:
46
+
47
+ gradient_accumulation_steps: 16
48
+ micro_batch_size: 2
49
+ num_epochs: 5
50
+ optimizer: adamw_bnb_8bit
51
+ lr_scheduler: cosine
52
+ learning_rate: 0.0002
53
+
54
+ train_on_inputs: false
55
+ group_by_length: false
56
+ bf16: auto
57
+ fp16:
58
+ tf32: false
59
+
60
+ gradient_checkpointing: true
61
+ early_stopping_patience:
62
+ resume_from_checkpoint:
63
+ local_rank:
64
+ logging_steps: 5
65
+ xformers_attention:
66
+ flash_attention: true
67
+
68
+ loss_watchdog_threshold: 5.0
69
+ loss_watchdog_patience: 3
70
+
71
+ warmup_steps: 10
72
+ evals_per_epoch:
73
+ eval_table_size:
74
+ eval_max_new_tokens: 128
75
+ saves_per_epoch: 1
76
+ debug:
77
+ deepspeed:
78
+ weight_decay: 0.0
79
+ fsdp:
80
+ fsdp_config:
81
+ special_tokens: