Commit
•
ce2e36a
1
Parent(s):
b62a3ae
jeroenherczeg/shawgpt-ft
Browse files
README.md
CHANGED
@@ -44,111 +44,28 @@ The following hyperparameters were used during training:
|
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
- lr_scheduler_type: linear
|
46 |
- lr_scheduler_warmup_steps: 2
|
47 |
-
- num_epochs:
|
48 |
|
49 |
### Training results
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss |
|
52 |
|:-------------:|:-----:|:----:|:---------------:|
|
53 |
-
| 4.
|
54 |
-
| 4.
|
55 |
-
| 4.
|
56 |
-
| 3.
|
57 |
-
| 4.
|
58 |
-
| 4.
|
59 |
-
| 4.
|
60 |
-
| 3.
|
61 |
-
| 4.
|
62 |
-
|
|
63 |
-
| 4.6256 | 10.77 | 35 | 4.2320 |
|
64 |
-
| 3.4795 | 12.0 | 39 | 4.2320 |
|
65 |
-
| 4.6287 | 12.92 | 42 | 4.2320 |
|
66 |
-
| 4.6345 | 13.85 | 45 | 4.2320 |
|
67 |
-
| 4.6308 | 14.77 | 48 | 4.2320 |
|
68 |
-
| 3.4811 | 16.0 | 52 | 4.2320 |
|
69 |
-
| 4.6598 | 16.92 | 55 | 4.2320 |
|
70 |
-
| 4.6658 | 17.85 | 58 | 4.2320 |
|
71 |
-
| 4.6393 | 18.77 | 61 | 4.2320 |
|
72 |
-
| 3.4755 | 20.0 | 65 | 4.2320 |
|
73 |
-
| 4.6234 | 20.92 | 68 | 4.2320 |
|
74 |
-
| 4.6511 | 21.85 | 71 | 4.2320 |
|
75 |
-
| 4.6326 | 22.77 | 74 | 4.2320 |
|
76 |
-
| 3.4732 | 24.0 | 78 | 4.2320 |
|
77 |
-
| 4.6331 | 24.92 | 81 | 4.2320 |
|
78 |
-
| 4.6402 | 25.85 | 84 | 4.2320 |
|
79 |
-
| 4.6498 | 26.77 | 87 | 4.2320 |
|
80 |
-
| 3.4715 | 28.0 | 91 | 4.2320 |
|
81 |
-
| 4.665 | 28.92 | 94 | 4.2320 |
|
82 |
-
| 4.6558 | 29.85 | 97 | 4.2320 |
|
83 |
-
| 4.6404 | 30.77 | 100 | 4.2320 |
|
84 |
-
| 3.4852 | 32.0 | 104 | 4.2320 |
|
85 |
-
| 4.6258 | 32.92 | 107 | 4.2320 |
|
86 |
-
| 4.6543 | 33.85 | 110 | 4.2320 |
|
87 |
-
| 4.6502 | 34.77 | 113 | 4.2320 |
|
88 |
-
| 3.4824 | 36.0 | 117 | 4.2320 |
|
89 |
-
| 4.6669 | 36.92 | 120 | 4.2320 |
|
90 |
-
| 4.6597 | 37.85 | 123 | 4.2320 |
|
91 |
-
| 4.6377 | 38.77 | 126 | 4.2320 |
|
92 |
-
| 3.4882 | 40.0 | 130 | 4.2320 |
|
93 |
-
| 4.6538 | 40.92 | 133 | 4.2320 |
|
94 |
-
| 4.6345 | 41.85 | 136 | 4.2320 |
|
95 |
-
| 4.6379 | 42.77 | 139 | 4.2320 |
|
96 |
-
| 3.4759 | 44.0 | 143 | 4.2320 |
|
97 |
-
| 4.6265 | 44.92 | 146 | 4.2320 |
|
98 |
-
| 4.6388 | 45.85 | 149 | 4.2320 |
|
99 |
-
| 4.627 | 46.77 | 152 | 4.2320 |
|
100 |
-
| 3.4803 | 48.0 | 156 | 4.2320 |
|
101 |
-
| 4.6648 | 48.92 | 159 | 4.2320 |
|
102 |
-
| 4.6395 | 49.85 | 162 | 4.2320 |
|
103 |
-
| 4.6139 | 50.77 | 165 | 4.2320 |
|
104 |
-
| 3.4922 | 52.0 | 169 | 4.2320 |
|
105 |
-
| 4.637 | 52.92 | 172 | 4.2320 |
|
106 |
-
| 4.6463 | 53.85 | 175 | 4.2320 |
|
107 |
-
| 4.6362 | 54.77 | 178 | 4.2320 |
|
108 |
-
| 3.487 | 56.0 | 182 | 4.2320 |
|
109 |
-
| 4.6366 | 56.92 | 185 | 4.2320 |
|
110 |
-
| 4.6218 | 57.85 | 188 | 4.2320 |
|
111 |
-
| 4.6565 | 58.77 | 191 | 4.2320 |
|
112 |
-
| 3.5069 | 60.0 | 195 | 4.2320 |
|
113 |
-
| 4.6391 | 60.92 | 198 | 4.2320 |
|
114 |
-
| 4.6256 | 61.85 | 201 | 4.2320 |
|
115 |
-
| 4.6583 | 62.77 | 204 | 4.2320 |
|
116 |
-
| 3.4692 | 64.0 | 208 | 4.2320 |
|
117 |
-
| 4.6473 | 64.92 | 211 | 4.2320 |
|
118 |
-
| 4.6427 | 65.85 | 214 | 4.2320 |
|
119 |
-
| 4.6547 | 66.77 | 217 | 4.2320 |
|
120 |
-
| 3.4708 | 68.0 | 221 | 4.2320 |
|
121 |
-
| 4.6536 | 68.92 | 224 | 4.2320 |
|
122 |
-
| 4.6429 | 69.85 | 227 | 4.2320 |
|
123 |
-
| 4.634 | 70.77 | 230 | 4.2320 |
|
124 |
-
| 3.4907 | 72.0 | 234 | 4.2320 |
|
125 |
-
| 4.6467 | 72.92 | 237 | 4.2320 |
|
126 |
-
| 4.6311 | 73.85 | 240 | 4.2320 |
|
127 |
-
| 4.6493 | 74.77 | 243 | 4.2320 |
|
128 |
-
| 3.4631 | 76.0 | 247 | 4.2320 |
|
129 |
-
| 4.6688 | 76.92 | 250 | 4.2320 |
|
130 |
-
| 4.6465 | 77.85 | 253 | 4.2320 |
|
131 |
-
| 4.6302 | 78.77 | 256 | 4.2320 |
|
132 |
-
| 3.4796 | 80.0 | 260 | 4.2320 |
|
133 |
-
| 4.6425 | 80.92 | 263 | 4.2320 |
|
134 |
-
| 4.6324 | 81.85 | 266 | 4.2320 |
|
135 |
-
| 4.6696 | 82.77 | 269 | 4.2320 |
|
136 |
-
| 3.4871 | 84.0 | 273 | 4.2320 |
|
137 |
-
| 4.6406 | 84.92 | 276 | 4.2320 |
|
138 |
-
| 4.618 | 85.85 | 279 | 4.2320 |
|
139 |
-
| 4.6478 | 86.77 | 282 | 4.2320 |
|
140 |
-
| 3.4833 | 88.0 | 286 | 4.2320 |
|
141 |
-
| 4.6455 | 88.92 | 289 | 4.2320 |
|
142 |
-
| 4.6476 | 89.85 | 292 | 4.2320 |
|
143 |
-
| 4.6218 | 90.77 | 295 | 4.2320 |
|
144 |
-
| 3.4687 | 92.0 | 299 | 4.2320 |
|
145 |
-
| 4.283 | 92.31 | 300 | 4.2320 |
|
146 |
|
147 |
|
148 |
### Framework versions
|
149 |
|
150 |
- PEFT 0.10.0
|
151 |
-
- Transformers 4.
|
152 |
- Pytorch 2.1.0+cu121
|
153 |
- Datasets 2.18.0
|
154 |
- Tokenizers 0.15.2
|
|
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
- lr_scheduler_type: linear
|
46 |
- lr_scheduler_warmup_steps: 2
|
47 |
+
- num_epochs: 10
|
48 |
|
49 |
### Training results
|
50 |
|
51 |
| Training Loss | Epoch | Step | Validation Loss |
|
52 |
|:-------------:|:-----:|:----:|:---------------:|
|
53 |
+
| 4.6433 | 0.92 | 3 | 4.2320 |
|
54 |
+
| 4.6544 | 1.85 | 6 | 4.2320 |
|
55 |
+
| 4.6459 | 2.77 | 9 | 4.2320 |
|
56 |
+
| 3.4822 | 4.0 | 13 | 4.2320 |
|
57 |
+
| 4.6298 | 4.92 | 16 | 4.2320 |
|
58 |
+
| 4.6605 | 5.85 | 19 | 4.2320 |
|
59 |
+
| 4.6392 | 6.77 | 22 | 4.2320 |
|
60 |
+
| 3.4844 | 8.0 | 26 | 4.2320 |
|
61 |
+
| 4.6305 | 8.92 | 29 | 4.2320 |
|
62 |
+
| 3.3623 | 9.23 | 30 | 4.2320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
### Framework versions
|
66 |
|
67 |
- PEFT 0.10.0
|
68 |
+
- Transformers 4.36.2
|
69 |
- Pytorch 2.1.0+cu121
|
70 |
- Datasets 2.18.0
|
71 |
- Tokenizers 0.15.2
|
runs/Apr05_09-52-57_EVAOS-1/events.out.tfevents.1712303577.EVAOS-1.444372.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76a785b3f0272cba8ce6b5ddc8de528a8840b28b6e4eae2b25f380d7f7346fd3
|
3 |
+
size 4951
|
runs/Apr05_09-54-12_EVAOS-1/events.out.tfevents.1712303653.EVAOS-1.453618.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfad24e4b8812678f5b8c24460d88e2688e173a50c0e852f23c7787747ac1f68
|
3 |
+
size 9499
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bf858ca08d1bff2109ddf6414ee2668f05186c26549b1860bb60e3182bddc1b
|
3 |
+
size 4664
|