Training in progress, epoch 2
Browse files
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0a863081a7849bacfb0250cc678e4f23cab38f8be22f19e40b3475762b28c98
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffc5cca4cb239a32b0e68bc0471df1ac1388b955fe5fbbf72538835f5b3eca31
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dce2dec849d7319a76a385b2dfc6a325ebf5460cea4073dba0f1e764cf692028
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4cee7f510d78dae0c5e00ce755e738f1e9816421f0dd2ae2238327d2b883e88
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -167,3 +167,84 @@
|
|
167 |
{"current_steps": 167, "total_steps": 249, "loss": 1.361, "lr": 2.95777871532002e-06, "epoch": 2.004, "percentage": 67.07, "elapsed_time": "2:37:08", "remaining_time": "1:17:09"}
|
168 |
{"current_steps": 168, "total_steps": 249, "loss": 0.7807, "lr": 2.893972885805148e-06, "epoch": 2.016, "percentage": 67.47, "elapsed_time": "2:37:56", "remaining_time": "1:16:09"}
|
169 |
{"current_steps": 169, "total_steps": 249, "loss": 0.7914, "lr": 2.83058130441221e-06, "epoch": 2.028, "percentage": 67.87, "elapsed_time": "2:39:00", "remaining_time": "1:15:16"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
{"current_steps": 167, "total_steps": 249, "loss": 1.361, "lr": 2.95777871532002e-06, "epoch": 2.004, "percentage": 67.07, "elapsed_time": "2:37:08", "remaining_time": "1:17:09"}
|
168 |
{"current_steps": 168, "total_steps": 249, "loss": 0.7807, "lr": 2.893972885805148e-06, "epoch": 2.016, "percentage": 67.47, "elapsed_time": "2:37:56", "remaining_time": "1:16:09"}
|
169 |
{"current_steps": 169, "total_steps": 249, "loss": 0.7914, "lr": 2.83058130441221e-06, "epoch": 2.028, "percentage": 67.87, "elapsed_time": "2:39:00", "remaining_time": "1:15:16"}
|
170 |
+
{"current_steps": 170, "total_steps": 249, "loss": 0.8962, "lr": 2.7676164400421864e-06, "epoch": 2.04, "percentage": 68.27, "elapsed_time": "2:40:02", "remaining_time": "1:14:22"}
|
171 |
+
{"current_steps": 171, "total_steps": 249, "loss": 0.7885, "lr": 2.705090677662311e-06, "epoch": 2.052, "percentage": 68.67, "elapsed_time": "2:41:02", "remaining_time": "1:13:27"}
|
172 |
+
{"current_steps": 172, "total_steps": 249, "loss": 0.6953, "lr": 2.6430163158700116e-06, "epoch": 2.064, "percentage": 69.08, "elapsed_time": "2:41:54", "remaining_time": "1:12:29"}
|
173 |
+
{"current_steps": 173, "total_steps": 249, "loss": 0.8564, "lr": 2.5814055644738013e-06, "epoch": 2.076, "percentage": 69.48, "elapsed_time": "2:42:50", "remaining_time": "1:11:32"}
|
174 |
+
{"current_steps": 174, "total_steps": 249, "loss": 0.7943, "lr": 2.520270542091663e-06, "epoch": 2.088, "percentage": 69.88, "elapsed_time": "2:43:42", "remaining_time": "1:10:33"}
|
175 |
+
{"current_steps": 175, "total_steps": 249, "loss": 0.7948, "lr": 2.4596232737673544e-06, "epoch": 2.1, "percentage": 70.28, "elapsed_time": "2:44:40", "remaining_time": "1:09:38"}
|
176 |
+
{"current_steps": 176, "total_steps": 249, "loss": 0.8502, "lr": 2.3994756886051267e-06, "epoch": 2.112, "percentage": 70.68, "elapsed_time": "2:45:42", "remaining_time": "1:08:43"}
|
177 |
+
{"current_steps": 177, "total_steps": 249, "loss": 0.7862, "lr": 2.339839617423318e-06, "epoch": 2.124, "percentage": 71.08, "elapsed_time": "2:46:41", "remaining_time": "1:07:48"}
|
178 |
+
{"current_steps": 178, "total_steps": 249, "loss": 0.8887, "lr": 2.280726790427258e-06, "epoch": 2.136, "percentage": 71.49, "elapsed_time": "2:47:48", "remaining_time": "1:06:56"}
|
179 |
+
{"current_steps": 179, "total_steps": 249, "loss": 0.813, "lr": 2.2221488349019903e-06, "epoch": 2.148, "percentage": 71.89, "elapsed_time": "2:48:50", "remaining_time": "1:06:01"}
|
180 |
+
{"current_steps": 180, "total_steps": 249, "loss": 0.7921, "lr": 2.1641172729252206e-06, "epoch": 2.16, "percentage": 72.29, "elapsed_time": "2:49:52", "remaining_time": "1:05:06"}
|
181 |
+
{"current_steps": 181, "total_steps": 249, "loss": 0.8221, "lr": 2.1066435191009717e-06, "epoch": 2.172, "percentage": 72.69, "elapsed_time": "2:50:41", "remaining_time": "1:04:07"}
|
182 |
+
{"current_steps": 182, "total_steps": 249, "loss": 0.7736, "lr": 2.04973887831436e-06, "epoch": 2.184, "percentage": 73.09, "elapsed_time": "2:51:37", "remaining_time": "1:03:10"}
|
183 |
+
{"current_steps": 183, "total_steps": 249, "loss": 0.8011, "lr": 1.9934145435079705e-06, "epoch": 2.196, "percentage": 73.49, "elapsed_time": "2:52:35", "remaining_time": "1:02:14"}
|
184 |
+
{"current_steps": 184, "total_steps": 249, "loss": 0.8939, "lr": 1.9376815934802496e-06, "epoch": 2.208, "percentage": 73.9, "elapsed_time": "2:53:33", "remaining_time": "1:01:18"}
|
185 |
+
{"current_steps": 185, "total_steps": 249, "loss": 0.7347, "lr": 1.8825509907063328e-06, "epoch": 2.22, "percentage": 74.3, "elapsed_time": "2:54:28", "remaining_time": "1:00:21"}
|
186 |
+
{"current_steps": 186, "total_steps": 249, "loss": 0.8875, "lr": 1.8280335791817733e-06, "epoch": 2.232, "percentage": 74.7, "elapsed_time": "2:55:25", "remaining_time": "0:59:25"}
|
187 |
+
{"current_steps": 187, "total_steps": 249, "loss": 0.7385, "lr": 1.7741400822895633e-06, "epoch": 2.2439999999999998, "percentage": 75.1, "elapsed_time": "2:56:18", "remaining_time": "0:58:27"}
|
188 |
+
{"current_steps": 188, "total_steps": 249, "loss": 0.8053, "lr": 1.7208811006908798e-06, "epoch": 2.2560000000000002, "percentage": 75.5, "elapsed_time": "2:57:16", "remaining_time": "0:57:31"}
|
189 |
+
{"current_steps": 189, "total_steps": 249, "loss": 0.8803, "lr": 1.6682671102399806e-06, "epoch": 2.268, "percentage": 75.9, "elapsed_time": "2:58:21", "remaining_time": "0:56:37"}
|
190 |
+
{"current_steps": 190, "total_steps": 249, "loss": 0.7583, "lr": 1.6163084599236278e-06, "epoch": 2.2800000000000002, "percentage": 76.31, "elapsed_time": "2:59:19", "remaining_time": "0:55:41"}
|
191 |
+
{"current_steps": 191, "total_steps": 249, "loss": 0.8772, "lr": 1.5650153698254916e-06, "epoch": 2.292, "percentage": 76.71, "elapsed_time": "3:00:24", "remaining_time": "0:54:47"}
|
192 |
+
{"current_steps": 192, "total_steps": 249, "loss": 0.72, "lr": 1.514397929115884e-06, "epoch": 2.304, "percentage": 77.11, "elapsed_time": "3:01:20", "remaining_time": "0:53:50"}
|
193 |
+
{"current_steps": 193, "total_steps": 249, "loss": 0.8059, "lr": 1.4644660940672628e-06, "epoch": 2.316, "percentage": 77.51, "elapsed_time": "3:02:16", "remaining_time": "0:52:53"}
|
194 |
+
{"current_steps": 194, "total_steps": 249, "loss": 0.7998, "lr": 1.4152296860958641e-06, "epoch": 2.328, "percentage": 77.91, "elapsed_time": "3:03:16", "remaining_time": "0:51:57"}
|
195 |
+
{"current_steps": 195, "total_steps": 249, "loss": 0.7974, "lr": 1.3666983898298659e-06, "epoch": 2.34, "percentage": 78.31, "elapsed_time": "3:04:11", "remaining_time": "0:51:00"}
|
196 |
+
{"current_steps": 196, "total_steps": 249, "loss": 0.7687, "lr": 1.3188817512044544e-06, "epoch": 2.352, "percentage": 78.71, "elapsed_time": "3:05:04", "remaining_time": "0:50:02"}
|
197 |
+
{"current_steps": 197, "total_steps": 249, "loss": 0.9035, "lr": 1.2717891755841722e-06, "epoch": 2.364, "percentage": 79.12, "elapsed_time": "3:06:00", "remaining_time": "0:49:05"}
|
198 |
+
{"current_steps": 198, "total_steps": 249, "loss": 0.7542, "lr": 1.225429925912921e-06, "epoch": 2.376, "percentage": 79.52, "elapsed_time": "3:06:52", "remaining_time": "0:48:07"}
|
199 |
+
{"current_steps": 199, "total_steps": 249, "loss": 0.8088, "lr": 1.1798131208919628e-06, "epoch": 2.388, "percentage": 79.92, "elapsed_time": "3:07:46", "remaining_time": "0:47:10"}
|
200 |
+
{"current_steps": 200, "total_steps": 249, "loss": 0.6584, "lr": 1.134947733186315e-06, "epoch": 2.4, "percentage": 80.32, "elapsed_time": "3:08:34", "remaining_time": "0:46:11"}
|
201 |
+
{"current_steps": 201, "total_steps": 249, "loss": 0.8569, "lr": 1.0908425876598512e-06, "epoch": 2.412, "percentage": 80.72, "elapsed_time": "3:09:33", "remaining_time": "0:45:15"}
|
202 |
+
{"current_steps": 202, "total_steps": 249, "loss": 0.8836, "lr": 1.047506359639483e-06, "epoch": 2.424, "percentage": 81.12, "elapsed_time": "3:10:30", "remaining_time": "0:44:19"}
|
203 |
+
{"current_steps": 203, "total_steps": 249, "loss": 0.7372, "lr": 1.004947573208756e-06, "epoch": 2.436, "percentage": 81.53, "elapsed_time": "3:11:20", "remaining_time": "0:43:21"}
|
204 |
+
{"current_steps": 204, "total_steps": 249, "loss": 0.7824, "lr": 9.631745995311881e-07, "epoch": 2.448, "percentage": 81.93, "elapsed_time": "3:12:03", "remaining_time": "0:42:21"}
|
205 |
+
{"current_steps": 205, "total_steps": 249, "loss": 0.7465, "lr": 9.221956552036992e-07, "epoch": 2.46, "percentage": 82.33, "elapsed_time": "3:12:59", "remaining_time": "0:41:25"}
|
206 |
+
{"current_steps": 206, "total_steps": 249, "loss": 0.694, "lr": 8.820188006404268e-07, "epoch": 2.472, "percentage": 82.73, "elapsed_time": "3:13:52", "remaining_time": "0:40:28"}
|
207 |
+
{"current_steps": 207, "total_steps": 249, "loss": 0.8708, "lr": 8.426519384872733e-07, "epoch": 2.484, "percentage": 83.13, "elapsed_time": "3:14:56", "remaining_time": "0:39:33"}
|
208 |
+
{"current_steps": 208, "total_steps": 249, "loss": 0.7514, "lr": 8.041028120674894e-07, "epoch": 2.496, "percentage": 83.53, "elapsed_time": "3:15:42", "remaining_time": "0:38:34"}
|
209 |
+
{"current_steps": 209, "total_steps": 249, "loss": 0.8477, "lr": 7.663790038585794e-07, "epoch": 2.508, "percentage": 83.94, "elapsed_time": "3:16:38", "remaining_time": "0:37:38"}
|
210 |
+
{"current_steps": 210, "total_steps": 249, "loss": 0.8247, "lr": 7.294879340008632e-07, "epoch": 2.52, "percentage": 84.34, "elapsed_time": "3:17:38", "remaining_time": "0:36:42"}
|
211 |
+
{"current_steps": 211, "total_steps": 249, "loss": 0.8271, "lr": 6.934368588379553e-07, "epoch": 2.532, "percentage": 84.74, "elapsed_time": "3:18:37", "remaining_time": "0:35:46"}
|
212 |
+
{"current_steps": 212, "total_steps": 249, "loss": 0.8894, "lr": 6.582328694894729e-07, "epoch": 2.544, "percentage": 85.14, "elapsed_time": "3:19:33", "remaining_time": "0:34:49"}
|
213 |
+
{"current_steps": 213, "total_steps": 249, "loss": 0.7195, "lr": 6.238828904562316e-07, "epoch": 2.556, "percentage": 85.54, "elapsed_time": "3:20:24", "remaining_time": "0:33:52"}
|
214 |
+
{"current_steps": 214, "total_steps": 249, "loss": 0.7907, "lr": 5.903936782582253e-07, "epoch": 2.568, "percentage": 85.94, "elapsed_time": "3:21:19", "remaining_time": "0:32:55"}
|
215 |
+
{"current_steps": 215, "total_steps": 249, "loss": 0.8158, "lr": 5.577718201056392e-07, "epoch": 2.58, "percentage": 86.35, "elapsed_time": "3:22:23", "remaining_time": "0:32:00"}
|
216 |
+
{"current_steps": 216, "total_steps": 249, "loss": 0.7774, "lr": 5.260237326031698e-07, "epoch": 2.592, "percentage": 86.75, "elapsed_time": "3:23:13", "remaining_time": "0:31:02"}
|
217 |
+
{"current_steps": 217, "total_steps": 249, "loss": 0.7719, "lr": 4.951556604879049e-07, "epoch": 2.604, "percentage": 87.15, "elapsed_time": "3:24:07", "remaining_time": "0:30:06"}
|
218 |
+
{"current_steps": 218, "total_steps": 249, "loss": 0.7448, "lr": 4.651736754009972e-07, "epoch": 2.616, "percentage": 87.55, "elapsed_time": "3:24:57", "remaining_time": "0:29:08"}
|
219 |
+
{"current_steps": 219, "total_steps": 249, "loss": 0.9587, "lr": 4.3608367469340553e-07, "epoch": 2.628, "percentage": 87.95, "elapsed_time": "3:25:56", "remaining_time": "0:28:12"}
|
220 |
+
{"current_steps": 220, "total_steps": 249, "loss": 0.7279, "lr": 4.078913802658946e-07, "epoch": 2.64, "percentage": 88.35, "elapsed_time": "3:26:43", "remaining_time": "0:27:15"}
|
221 |
+
{"current_steps": 221, "total_steps": 249, "loss": 0.7954, "lr": 3.8060233744356634e-07, "epoch": 2.652, "percentage": 88.76, "elapsed_time": "3:27:35", "remaining_time": "0:26:18"}
|
222 |
+
{"current_steps": 222, "total_steps": 249, "loss": 0.8308, "lr": 3.542219138851094e-07, "epoch": 2.664, "percentage": 89.16, "elapsed_time": "3:28:35", "remaining_time": "0:25:22"}
|
223 |
+
{"current_steps": 223, "total_steps": 249, "loss": 0.7808, "lr": 3.287552985270015e-07, "epoch": 2.676, "percentage": 89.56, "elapsed_time": "3:29:28", "remaining_time": "0:24:25"}
|
224 |
+
{"current_steps": 224, "total_steps": 249, "loss": 0.8398, "lr": 3.0420750056286195e-07, "epoch": 2.6879999999999997, "percentage": 89.96, "elapsed_time": "3:30:25", "remaining_time": "0:23:29"}
|
225 |
+
{"current_steps": 225, "total_steps": 249, "loss": 0.707, "lr": 2.8058334845816214e-07, "epoch": 2.7, "percentage": 90.36, "elapsed_time": "3:31:15", "remaining_time": "0:22:32"}
|
226 |
+
{"current_steps": 226, "total_steps": 249, "loss": 0.8001, "lr": 2.5788748900048676e-07, "epoch": 2.7119999999999997, "percentage": 90.76, "elapsed_time": "3:32:05", "remaining_time": "0:21:35"}
|
227 |
+
{"current_steps": 227, "total_steps": 249, "loss": 0.8116, "lr": 2.3612438638551837e-07, "epoch": 2.724, "percentage": 91.16, "elapsed_time": "3:33:07", "remaining_time": "0:20:39"}
|
228 |
+
{"current_steps": 228, "total_steps": 249, "loss": 0.776, "lr": 2.152983213389559e-07, "epoch": 2.7359999999999998, "percentage": 91.57, "elapsed_time": "3:33:57", "remaining_time": "0:19:42"}
|
229 |
+
{"current_steps": 229, "total_steps": 249, "loss": 0.8247, "lr": 1.9541339027450256e-07, "epoch": 2.748, "percentage": 91.97, "elapsed_time": "3:34:58", "remaining_time": "0:18:46"}
|
230 |
+
{"current_steps": 230, "total_steps": 249, "loss": 0.8348, "lr": 1.7647350448812105e-07, "epoch": 2.76, "percentage": 92.37, "elapsed_time": "3:35:54", "remaining_time": "0:17:50"}
|
231 |
+
{"current_steps": 231, "total_steps": 249, "loss": 0.7256, "lr": 1.5848238938869332e-07, "epoch": 2.7720000000000002, "percentage": 92.77, "elapsed_time": "3:36:54", "remaining_time": "0:16:54"}
|
232 |
+
{"current_steps": 232, "total_steps": 249, "loss": 0.8586, "lr": 1.4144358376524504e-07, "epoch": 2.784, "percentage": 93.17, "elapsed_time": "3:37:52", "remaining_time": "0:15:57"}
|
233 |
+
{"current_steps": 233, "total_steps": 249, "loss": 0.7834, "lr": 1.253604390908819e-07, "epoch": 2.7960000000000003, "percentage": 93.57, "elapsed_time": "3:38:53", "remaining_time": "0:15:01"}
|
234 |
+
{"current_steps": 234, "total_steps": 249, "loss": 0.8294, "lr": 1.10236118863562e-07, "epoch": 2.808, "percentage": 93.98, "elapsed_time": "3:39:42", "remaining_time": "0:14:05"}
|
235 |
+
{"current_steps": 235, "total_steps": 249, "loss": 0.7621, "lr": 9.607359798384785e-08, "epoch": 2.82, "percentage": 94.38, "elapsed_time": "3:40:33", "remaining_time": "0:13:08"}
|
236 |
+
{"current_steps": 236, "total_steps": 249, "loss": 0.8652, "lr": 8.287566216975795e-08, "epoch": 2.832, "percentage": 94.78, "elapsed_time": "3:41:26", "remaining_time": "0:12:11"}
|
237 |
+
{"current_steps": 237, "total_steps": 249, "loss": 0.7203, "lr": 7.064490740882057e-08, "epoch": 2.844, "percentage": 95.18, "elapsed_time": "3:42:23", "remaining_time": "0:11:15"}
|
238 |
+
{"current_steps": 238, "total_steps": 249, "loss": 0.8733, "lr": 5.938373944745612e-08, "epoch": 2.856, "percentage": 95.58, "elapsed_time": "3:43:17", "remaining_time": "0:10:19"}
|
239 |
+
{"current_steps": 239, "total_steps": 249, "loss": 0.7493, "lr": 4.909437331777178e-08, "epoch": 2.868, "percentage": 95.98, "elapsed_time": "3:44:10", "remaining_time": "0:09:22"}
|
240 |
+
{"current_steps": 240, "total_steps": 249, "loss": 0.7482, "lr": 3.977883290187667e-08, "epoch": 2.88, "percentage": 96.39, "elapsed_time": "3:45:05", "remaining_time": "0:08:26"}
|
241 |
+
{"current_steps": 241, "total_steps": 249, "loss": 0.8974, "lr": 3.143895053378698e-08, "epoch": 2.892, "percentage": 96.79, "elapsed_time": "3:46:01", "remaining_time": "0:07:30"}
|
242 |
+
{"current_steps": 242, "total_steps": 249, "loss": 0.7592, "lr": 2.4076366639015914e-08, "epoch": 2.904, "percentage": 97.19, "elapsed_time": "3:46:49", "remaining_time": "0:06:33"}
|
243 |
+
{"current_steps": 243, "total_steps": 249, "loss": 0.7935, "lr": 1.769252941190458e-08, "epoch": 2.916, "percentage": 97.59, "elapsed_time": "3:47:53", "remaining_time": "0:05:37"}
|
244 |
+
{"current_steps": 244, "total_steps": 249, "loss": 0.78, "lr": 1.2288694530769862e-08, "epoch": 2.928, "percentage": 97.99, "elapsed_time": "3:48:44", "remaining_time": "0:04:41"}
|
245 |
+
{"current_steps": 245, "total_steps": 249, "loss": 0.8765, "lr": 7.865924910916977e-09, "epoch": 2.94, "percentage": 98.39, "elapsed_time": "3:49:42", "remaining_time": "0:03:45"}
|
246 |
+
{"current_steps": 246, "total_steps": 249, "loss": 0.807, "lr": 4.4250904955656095e-09, "epoch": 2.952, "percentage": 98.8, "elapsed_time": "3:50:35", "remaining_time": "0:02:48"}
|
247 |
+
{"current_steps": 247, "total_steps": 249, "loss": 0.8633, "lr": 1.9668680847356735e-09, "epoch": 2.964, "percentage": 99.2, "elapsed_time": "3:51:34", "remaining_time": "0:01:52"}
|
248 |
+
{"current_steps": 248, "total_steps": 249, "loss": 0.7087, "lr": 4.91741202124918e-10, "epoch": 2.976, "percentage": 99.6, "elapsed_time": "3:52:33", "remaining_time": "0:00:56"}
|
249 |
+
{"current_steps": 249, "total_steps": 249, "loss": 0.9268, "lr": 0.0, "epoch": 2.988, "percentage": 100.0, "elapsed_time": "3:53:41", "remaining_time": "0:00:00"}
|
250 |
+
{"current_steps": 249, "total_steps": 249, "epoch": 2.988, "percentage": 100.0, "elapsed_time": "3:55:50", "remaining_time": "0:00:00"}
|