Training in progress, step 126, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 191968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e861928e1b2ca38a4a6746866d7d8f7b7ca001e137c5b880cdc7ca389cae56a0
|
3 |
size 191968
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 253144
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d926fce7eee99bf36a4de34f618c75332ae83e4fbcc18a94351c99ca4b7722e1
|
3 |
size 253144
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5043a9d527c42b332561475083f4469c87b7663281bbf8ae1e7c09b5adbc61a
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9350fe25e75c3aec8e0f08fabcc3ea69e0fa51f62eea810c9a733906e4363f8
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 21,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -300,6 +300,63 @@
|
|
300 |
"eval_samples_per_second": 515.557,
|
301 |
"eval_steps_per_second": 66.286,
|
302 |
"step": 105
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
}
|
304 |
],
|
305 |
"logging_steps": 3,
|
@@ -319,7 +376,7 @@
|
|
319 |
"attributes": {}
|
320 |
}
|
321 |
},
|
322 |
-
"total_flos":
|
323 |
"train_batch_size": 8,
|
324 |
"trial_name": null,
|
325 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.525679758308157,
|
5 |
"eval_steps": 21,
|
6 |
+
"global_step": 126,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
300 |
"eval_samples_per_second": 515.557,
|
301 |
"eval_steps_per_second": 66.286,
|
302 |
"step": 105
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 1.308157099697885,
|
306 |
+
"grad_norm": 0.21600359678268433,
|
307 |
+
"learning_rate": 6.394324377647028e-05,
|
308 |
+
"loss": 10.1603,
|
309 |
+
"step": 108
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 1.3444108761329305,
|
313 |
+
"grad_norm": 0.24075965583324432,
|
314 |
+
"learning_rate": 6.203940082845144e-05,
|
315 |
+
"loss": 10.0864,
|
316 |
+
"step": 111
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"epoch": 1.3806646525679758,
|
320 |
+
"grad_norm": 0.25287488102912903,
|
321 |
+
"learning_rate": 6.011683834586473e-05,
|
322 |
+
"loss": 10.6661,
|
323 |
+
"step": 114
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 1.4169184290030212,
|
327 |
+
"grad_norm": 0.2387695461511612,
|
328 |
+
"learning_rate": 5.8178545636514145e-05,
|
329 |
+
"loss": 9.6976,
|
330 |
+
"step": 117
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"epoch": 1.4531722054380665,
|
334 |
+
"grad_norm": 0.21192365884780884,
|
335 |
+
"learning_rate": 5.622753646644102e-05,
|
336 |
+
"loss": 10.451,
|
337 |
+
"step": 120
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"epoch": 1.4894259818731117,
|
341 |
+
"grad_norm": 0.18546977639198303,
|
342 |
+
"learning_rate": 5.426684437395196e-05,
|
343 |
+
"loss": 10.2875,
|
344 |
+
"step": 123
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"epoch": 1.525679758308157,
|
348 |
+
"grad_norm": 0.2497938573360443,
|
349 |
+
"learning_rate": 5.229951795290353e-05,
|
350 |
+
"loss": 10.3627,
|
351 |
+
"step": 126
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"epoch": 1.525679758308157,
|
355 |
+
"eval_loss": 10.205331802368164,
|
356 |
+
"eval_runtime": 0.2653,
|
357 |
+
"eval_samples_per_second": 527.718,
|
358 |
+
"eval_steps_per_second": 67.849,
|
359 |
+
"step": 126
|
360 |
}
|
361 |
],
|
362 |
"logging_steps": 3,
|
|
|
376 |
"attributes": {}
|
377 |
}
|
378 |
},
|
379 |
+
"total_flos": 14055383236608.0,
|
380 |
"train_batch_size": 8,
|
381 |
"trial_name": null,
|
382 |
"trial_params": null
|