Update README.md
Browse files
README.md
CHANGED
@@ -128,21 +128,21 @@ dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template
|
|
128 |
#### Training Hyperparameters
|
129 |
|
130 |
SFT parameters:
|
131 |
-
-num_train_epochs=1
|
132 |
-
-per_device_train_batch_size=2
|
133 |
-
-gradient_accumulation_steps=2
|
134 |
-
-gradient_checkpointing=True
|
135 |
-
-optim="adamw_torch_fused"
|
136 |
-
-learning_rate=2e-4
|
137 |
-
-max_grad_norm=0.3
|
138 |
-
-warmup_ratio=0.01
|
139 |
-
-lr_scheduler_type="cosine"
|
140 |
-
-bf16=True
|
141 |
|
142 |
LORA parameters:
|
143 |
-
-rank_dimension = 6
|
144 |
-
-lora_alpha = 8
|
145 |
-
-lora_dropout = 0.05
|
146 |
|
147 |
|
148 |
|
|
|
128 |
#### Training Hyperparameters
|
129 |
|
130 |
SFT parameters:
|
131 |
+
- num_train_epochs=1
|
132 |
+
- per_device_train_batch_size=2
|
133 |
+
- gradient_accumulation_steps=2
|
134 |
+
- gradient_checkpointing=True
|
135 |
+
- optim="adamw_torch_fused"
|
136 |
+
- learning_rate=2e-4
|
137 |
+
- max_grad_norm=0.3
|
138 |
+
- warmup_ratio=0.01
|
139 |
+
- lr_scheduler_type="cosine"
|
140 |
+
- bf16=True
|
141 |
|
142 |
LORA parameters:
|
143 |
+
- rank_dimension = 6
|
144 |
+
- lora_alpha = 8
|
145 |
+
- lora_dropout = 0.05
|
146 |
|
147 |
|
148 |
|