Training in progress, step 150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3380768360
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a6d8f0f796537366c6e78ca004befff9f9c27672a628bae1e611c8bc0f94c8c
|
3 |
size 3380768360
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1757899449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffd75d21ac4ac0a8645a72715f91e4d5f09c05dd5a2548ed04a8b49d623fc3a5
|
3 |
size 1757899449
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5dc9af15ae765cffc21eeb6ddbc68a2629e47a5fc5164b3c35695e55c025ec4
|
3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:213d03f95061a3291403e8d5572036299f2f6f739be51135e2941aff4f3ccff7
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 30,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -215,6 +215,56 @@
|
|
215 |
"eval_samples_per_second": 0.291,
|
216 |
"eval_steps_per_second": 0.073,
|
217 |
"step": 120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
}
|
219 |
],
|
220 |
"logging_steps": 5,
|
@@ -234,7 +284,7 @@
|
|
234 |
"attributes": {}
|
235 |
}
|
236 |
},
|
237 |
-
"total_flos":
|
238 |
"train_batch_size": 8,
|
239 |
"trial_name": null,
|
240 |
"trial_params": null
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 1.5644599303135889,
|
6 |
"eval_steps": 30,
|
7 |
+
"global_step": 150,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
215 |
"eval_samples_per_second": 0.291,
|
216 |
"eval_steps_per_second": 0.073,
|
217 |
"step": 120
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"epoch": 1.3031358885017421,
|
221 |
+
"grad_norm": 0.07795720547437668,
|
222 |
+
"learning_rate": 2.950898376017064e-05,
|
223 |
+
"loss": 0.1842,
|
224 |
+
"step": 125
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 1.3554006968641115,
|
228 |
+
"grad_norm": 0.07542526721954346,
|
229 |
+
"learning_rate": 2.573490187344596e-05,
|
230 |
+
"loss": 0.2031,
|
231 |
+
"step": 130
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"epoch": 1.4076655052264808,
|
235 |
+
"grad_norm": 0.10047340393066406,
|
236 |
+
"learning_rate": 2.2133776843878186e-05,
|
237 |
+
"loss": 0.24,
|
238 |
+
"step": 135
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"epoch": 1.4599303135888502,
|
242 |
+
"grad_norm": 0.13595731556415558,
|
243 |
+
"learning_rate": 1.873127678391816e-05,
|
244 |
+
"loss": 0.2808,
|
245 |
+
"step": 140
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"epoch": 1.5121951219512195,
|
249 |
+
"grad_norm": 0.06210995092988014,
|
250 |
+
"learning_rate": 1.555165404621567e-05,
|
251 |
+
"loss": 0.235,
|
252 |
+
"step": 145
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"epoch": 1.5644599303135889,
|
256 |
+
"grad_norm": 0.08401988446712494,
|
257 |
+
"learning_rate": 1.2617572357609564e-05,
|
258 |
+
"loss": 0.1849,
|
259 |
+
"step": 150
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"epoch": 1.5644599303135889,
|
263 |
+
"eval_loss": 0.23435795307159424,
|
264 |
+
"eval_runtime": 1753.006,
|
265 |
+
"eval_samples_per_second": 0.291,
|
266 |
+
"eval_steps_per_second": 0.073,
|
267 |
+
"step": 150
|
268 |
}
|
269 |
],
|
270 |
"logging_steps": 5,
|
|
|
284 |
"attributes": {}
|
285 |
}
|
286 |
},
|
287 |
+
"total_flos": 9.840854969157304e+17,
|
288 |
"train_batch_size": 8,
|
289 |
"trial_name": null,
|
290 |
"trial_params": null
|