Update README.md
Browse files
README.md
CHANGED
@@ -19,32 +19,32 @@ Base Model: *upstage/SOLAR-10.7B-v1.0*
|
|
19 |
|
20 |
-num_epochs : 1
|
21 |
|
22 |
-
micro_batch : 1
|
23 |
|
24 |
-
gradient_accumulation_steps : batch_size // micro_batch
|
25 |
|
26 |
***hyper params II***
|
27 |
|
28 |
-
cutoff_len : 4096
|
29 |
|
30 |
-
lr_scheduler : 'cosine'
|
31 |
|
32 |
-
warmup_ratio : 0.06
|
33 |
|
34 |
-
learning_rate : 4e-4
|
35 |
|
36 |
-
optimizer : 'adamw_torch'
|
37 |
|
38 |
-
weight_decay : 0.01
|
39 |
|
40 |
-
max_grad_norm : 1.0
|
41 |
|
42 |
***LoRA config***
|
43 |
|
44 |
-
lora_r : 64
|
45 |
|
46 |
-
lora_alpha : 16
|
47 |
|
48 |
-
lora_dropout : 0.05
|
49 |
|
50 |
-
lora_target_modules : ["gate_proj", "down_proj", "up_proj"]
|
|
|
19 |
|
20 |
-num_epochs : 1
|
21 |
|
22 |
+
-micro_batch : 1
|
23 |
|
24 |
+
-gradient_accumulation_steps : batch_size // micro_batch
|
25 |
|
26 |
***hyper params II***
|
27 |
|
28 |
+
-cutoff_len : 4096
|
29 |
|
30 |
+
-lr_scheduler : 'cosine'
|
31 |
|
32 |
+
-warmup_ratio : 0.06
|
33 |
|
34 |
+
-learning_rate : 4e-4
|
35 |
|
36 |
+
-optimizer : 'adamw_torch'
|
37 |
|
38 |
+
-weight_decay : 0.01
|
39 |
|
40 |
+
-max_grad_norm : 1.0
|
41 |
|
42 |
***LoRA config***
|
43 |
|
44 |
+
-lora_r : 64
|
45 |
|
46 |
+
-lora_alpha : 16
|
47 |
|
48 |
+
-lora_dropout : 0.05
|
49 |
|
50 |
+
-lora_target_modules : ["gate_proj", "down_proj", "up_proj"]
|