End of training
Browse files- README.md +21 -6
- adapter_model.bin +1 -1
README.md
CHANGED
@@ -26,6 +26,9 @@ load_in_8bit: false
|
|
26 |
load_in_4bit: true
|
27 |
strict: false
|
28 |
|
|
|
|
|
|
|
29 |
datasets:
|
30 |
- path: data/isaf_press_releases_ft.jsonl
|
31 |
conversation: alpaca
|
@@ -64,7 +67,7 @@ wandb_log_model:
|
|
64 |
|
65 |
gradient_accumulation_steps: 4
|
66 |
micro_batch_size: 2
|
67 |
-
num_epochs:
|
68 |
optimizer: adamw_bnb_8bit
|
69 |
lr_scheduler: cosine
|
70 |
learning_rate: 0.0002
|
@@ -106,7 +109,7 @@ special_tokens:
|
|
106 |
|
107 |
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
108 |
It achieves the following results on the evaluation set:
|
109 |
-
- Loss: 0.
|
110 |
|
111 |
## Model description
|
112 |
|
@@ -137,16 +140,28 @@ The following hyperparameters were used during training:
|
|
137 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
138 |
- lr_scheduler_type: cosine
|
139 |
- lr_scheduler_warmup_steps: 10
|
140 |
-
- num_epochs:
|
141 |
|
142 |
### Training results
|
143 |
|
144 |
| Training Loss | Epoch | Step | Validation Loss |
|
145 |
|:-------------:|:------:|:----:|:---------------:|
|
146 |
| 1.3462 | 0.0292 | 1 | 1.3536 |
|
147 |
-
| 0.
|
148 |
-
| 0.
|
149 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
|
152 |
### Framework versions
|
|
|
26 |
load_in_4bit: true
|
27 |
strict: false
|
28 |
|
29 |
+
data_seed: 42
|
30 |
+
seed: 42
|
31 |
+
|
32 |
datasets:
|
33 |
- path: data/isaf_press_releases_ft.jsonl
|
34 |
conversation: alpaca
|
|
|
67 |
|
68 |
gradient_accumulation_steps: 4
|
69 |
micro_batch_size: 2
|
70 |
+
num_epochs: 4
|
71 |
optimizer: adamw_bnb_8bit
|
72 |
lr_scheduler: cosine
|
73 |
learning_rate: 0.0002
|
|
|
109 |
|
110 |
This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
|
111 |
It achieves the following results on the evaluation set:
|
112 |
+
- Loss: 0.0288
|
113 |
|
114 |
## Model description
|
115 |
|
|
|
140 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
141 |
- lr_scheduler_type: cosine
|
142 |
- lr_scheduler_warmup_steps: 10
|
143 |
+
- num_epochs: 4
|
144 |
|
145 |
### Training results
|
146 |
|
147 |
| Training Loss | Epoch | Step | Validation Loss |
|
148 |
|:-------------:|:------:|:----:|:---------------:|
|
149 |
| 1.3462 | 0.0292 | 1 | 1.3536 |
|
150 |
+
| 0.1245 | 0.2628 | 9 | 0.0958 |
|
151 |
+
| 0.0521 | 0.5255 | 18 | 0.0523 |
|
152 |
+
| 0.0437 | 0.7883 | 27 | 0.0420 |
|
153 |
+
| 0.0312 | 1.0292 | 36 | 0.0383 |
|
154 |
+
| 0.0395 | 1.2920 | 45 | 0.0351 |
|
155 |
+
| 0.0309 | 1.5547 | 54 | 0.0329 |
|
156 |
+
| 0.0342 | 1.8175 | 63 | 0.0314 |
|
157 |
+
| 0.0334 | 2.0511 | 72 | 0.0318 |
|
158 |
+
| 0.0282 | 2.3139 | 81 | 0.0322 |
|
159 |
+
| 0.0263 | 2.5766 | 90 | 0.0301 |
|
160 |
+
| 0.0255 | 2.8394 | 99 | 0.0294 |
|
161 |
+
| 0.021 | 3.0803 | 108 | 0.0289 |
|
162 |
+
| 0.0236 | 3.3431 | 117 | 0.0289 |
|
163 |
+
| 0.0196 | 3.6058 | 126 | 0.0288 |
|
164 |
+
| 0.0228 | 3.8686 | 135 | 0.0288 |
|
165 |
|
166 |
|
167 |
### Framework versions
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 335706186
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:226abc5664ceb9d1b6b0db5a67a7a5f11c76e51be8d38e8d47612048bff3da1c
|
3 |
size 335706186
|