aguglaniAI
commited on
Commit
•
1ef0ea5
1
Parent(s):
72bbc42
phi2 fine-tuned with full dataset and high learning rate: Loss dropped to 0.02
Browse files
README.md
CHANGED
@@ -20,12 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
20 |
|
21 |
This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the generator dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
-
-
|
24 |
-
- eval_runtime: 107.7441
|
25 |
-
- eval_samples_per_second: 4.093
|
26 |
-
- eval_steps_per_second: 0.52
|
27 |
-
- epoch: 8.69
|
28 |
-
- step: 120
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -44,7 +39,7 @@ More information needed
|
|
44 |
### Training hyperparameters
|
45 |
|
46 |
The following hyperparameters were used during training:
|
47 |
-
- learning_rate: 0.
|
48 |
- train_batch_size: 2
|
49 |
- eval_batch_size: 8
|
50 |
- seed: 42
|
@@ -52,14 +47,48 @@ The following hyperparameters were used during training:
|
|
52 |
- total_train_batch_size: 32
|
53 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
54 |
- lr_scheduler_type: cosine
|
55 |
-
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
### Framework versions
|
60 |
|
61 |
-
- PEFT 0.
|
62 |
-
- Transformers 4.
|
63 |
- Pytorch 2.1.0+cu121
|
64 |
- Datasets 2.17.1
|
65 |
- Tokenizers 0.15.2
|
|
|
20 |
|
21 |
This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the generator dataset.
|
22 |
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.8105
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
## Model description
|
26 |
|
|
|
39 |
### Training hyperparameters
|
40 |
|
41 |
The following hyperparameters were used during training:
|
42 |
+
- learning_rate: 0.0008
|
43 |
- train_batch_size: 2
|
44 |
- eval_batch_size: 8
|
45 |
- seed: 42
|
|
|
47 |
- total_train_batch_size: 32
|
48 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
49 |
- lr_scheduler_type: cosine
|
50 |
+
- training_steps: 300
|
51 |
+
|
52 |
+
### Training results
|
53 |
+
|
54 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
55 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
56 |
+
| 0.6408 | 0.72 | 10 | 0.5720 |
|
57 |
+
| 0.4116 | 1.45 | 20 | 0.5234 |
|
58 |
+
| 0.3467 | 2.17 | 30 | 0.5068 |
|
59 |
+
| 0.328 | 2.9 | 40 | 0.4990 |
|
60 |
+
| 0.3013 | 3.62 | 50 | 0.5022 |
|
61 |
+
| 0.267 | 4.34 | 60 | 0.5051 |
|
62 |
+
| 0.2407 | 5.07 | 70 | 0.5151 |
|
63 |
+
| 0.2084 | 5.79 | 80 | 0.5329 |
|
64 |
+
| 0.1821 | 6.52 | 90 | 0.5566 |
|
65 |
+
| 0.1635 | 7.24 | 100 | 0.5996 |
|
66 |
+
| 0.1431 | 7.96 | 110 | 0.6137 |
|
67 |
+
| 0.1164 | 8.69 | 120 | 0.6461 |
|
68 |
+
| 0.1045 | 9.41 | 130 | 0.6714 |
|
69 |
+
| 0.0903 | 10.14 | 140 | 0.6719 |
|
70 |
+
| 0.0773 | 10.86 | 150 | 0.6802 |
|
71 |
+
| 0.0653 | 11.58 | 160 | 0.7234 |
|
72 |
+
| 0.0595 | 12.31 | 170 | 0.7497 |
|
73 |
+
| 0.0523 | 13.03 | 180 | 0.7281 |
|
74 |
+
| 0.0453 | 13.76 | 190 | 0.7439 |
|
75 |
+
| 0.0405 | 14.48 | 200 | 0.7655 |
|
76 |
+
| 0.0363 | 15.2 | 210 | 0.7674 |
|
77 |
+
| 0.0323 | 15.93 | 220 | 0.7835 |
|
78 |
+
| 0.0293 | 16.65 | 230 | 0.7924 |
|
79 |
+
| 0.0276 | 17.38 | 240 | 0.7981 |
|
80 |
+
| 0.0257 | 18.1 | 250 | 0.8023 |
|
81 |
+
| 0.0252 | 18.82 | 260 | 0.8019 |
|
82 |
+
| 0.0236 | 19.55 | 270 | 0.8040 |
|
83 |
+
| 0.023 | 20.27 | 280 | 0.8089 |
|
84 |
+
| 0.0232 | 21.0 | 290 | 0.8104 |
|
85 |
+
| 0.0231 | 21.72 | 300 | 0.8105 |
|
86 |
+
|
87 |
|
88 |
### Framework versions
|
89 |
|
90 |
+
- PEFT 0.9.0
|
91 |
+
- Transformers 4.38.1
|
92 |
- Pytorch 2.1.0+cu121
|
93 |
- Datasets 2.17.1
|
94 |
- Tokenizers 0.15.2
|
adapter_config.json
CHANGED
@@ -19,10 +19,11 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
|
|
22 |
"out_proj",
|
23 |
-
"fc2"
|
24 |
-
"Wqkv"
|
25 |
],
|
26 |
"task_type": "CAUSAL_LM",
|
|
|
27 |
"use_rslora": false
|
28 |
}
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
+
"Wqkv",
|
23 |
"out_proj",
|
24 |
+
"fc2"
|
|
|
25 |
],
|
26 |
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
"use_rslora": false
|
29 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 115368408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fad900d7eb93efe7de0ab4a684293b9198811e87718b03058e5284454f20476e
|
3 |
size 115368408
|
runs/Feb29_04-59-36_20d285a465bc/events.out.tfevents.1709182786.20d285a465bc.1395.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5038892394c4370c275493d1a404d4c7266c23e12901bfea8166291eeee5a917
|
3 |
+
size 19943
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91d6ac12a727c0b9c1d4b98747443200eb0a8c818028b5474df01a06342cfd0b
|
3 |
+
size 4920
|