step 300
Browse files
log/debug_0.log
CHANGED
@@ -669,3 +669,69 @@ Mixed precision type: fp16
|
|
669 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
|
670 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
|
671 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
669 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
|
670 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
|
671 |
07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
|
672 |
+
07/25/2024 06:30:45 - WARNING - huggingface_hub.repository - Several commits (5) will be pushed upstream.
|
673 |
+
07/25/2024 06:30:45 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.
|
674 |
+
07/25/2024 06:31:13 - WARNING - huggingface_hub.repository - To https://huggingface.co/shng2025/gptesla-small
|
675 |
+
d02b805..4d31a9f celestial-aardvark-128 -> celestial-aardvark-128
|
676 |
+
|
677 |
+
07/25/2024 06:31:13 - INFO - __main__ - Step 251: {'lr': 0.00017857142857142857, 'samples': 12048, 'steps': 250, 'loss/train': 5.627201557159424}
|
678 |
+
07/25/2024 06:31:14 - INFO - __main__ - Step 252: {'lr': 0.0001792857142857143, 'samples': 12096, 'steps': 251, 'loss/train': 6.002392292022705}
|
679 |
+
07/25/2024 06:31:14 - INFO - __main__ - Step 253: {'lr': 0.00017999999999999998, 'samples': 12144, 'steps': 252, 'loss/train': 5.872100353240967}
|
680 |
+
07/25/2024 06:31:14 - INFO - __main__ - Step 254: {'lr': 0.00018071428571428573, 'samples': 12192, 'steps': 253, 'loss/train': 6.0609612464904785}
|
681 |
+
07/25/2024 06:31:14 - INFO - __main__ - Step 255: {'lr': 0.00018142857142857145, 'samples': 12240, 'steps': 254, 'loss/train': 6.275620460510254}
|
682 |
+
07/25/2024 06:31:15 - INFO - __main__ - Step 256: {'lr': 0.00018214285714285714, 'samples': 12288, 'steps': 255, 'loss/train': 6.78406286239624}
|
683 |
+
07/25/2024 06:31:15 - INFO - __main__ - Step 257: {'lr': 0.00018285714285714286, 'samples': 12336, 'steps': 256, 'loss/train': 6.069532871246338}
|
684 |
+
07/25/2024 06:31:15 - INFO - __main__ - Step 258: {'lr': 0.00018357142857142858, 'samples': 12384, 'steps': 257, 'loss/train': 5.567933559417725}
|
685 |
+
07/25/2024 06:31:16 - INFO - __main__ - Step 259: {'lr': 0.00018428571428571428, 'samples': 12432, 'steps': 258, 'loss/train': 6.152994632720947}
|
686 |
+
07/25/2024 06:31:16 - INFO - __main__ - Step 260: {'lr': 0.000185, 'samples': 12480, 'steps': 259, 'loss/train': 5.771788120269775}
|
687 |
+
07/25/2024 06:31:16 - INFO - __main__ - Step 261: {'lr': 0.00018571428571428572, 'samples': 12528, 'steps': 260, 'loss/train': 5.717995643615723}
|
688 |
+
07/25/2024 06:31:16 - INFO - __main__ - Step 262: {'lr': 0.0001864285714285714, 'samples': 12576, 'steps': 261, 'loss/train': 5.839302062988281}
|
689 |
+
07/25/2024 06:31:17 - INFO - __main__ - Step 263: {'lr': 0.00018714285714285713, 'samples': 12624, 'steps': 262, 'loss/train': 5.257016658782959}
|
690 |
+
07/25/2024 06:31:17 - INFO - __main__ - Step 264: {'lr': 0.00018785714285714288, 'samples': 12672, 'steps': 263, 'loss/train': 6.241714000701904}
|
691 |
+
07/25/2024 06:31:17 - INFO - __main__ - Step 265: {'lr': 0.0001885714285714286, 'samples': 12720, 'steps': 264, 'loss/train': 6.639944553375244}
|
692 |
+
07/25/2024 06:31:17 - INFO - __main__ - Step 266: {'lr': 0.0001892857142857143, 'samples': 12768, 'steps': 265, 'loss/train': 5.12101936340332}
|
693 |
+
07/25/2024 06:31:18 - INFO - __main__ - Step 267: {'lr': 0.00019, 'samples': 12816, 'steps': 266, 'loss/train': 5.190861701965332}
|
694 |
+
07/25/2024 06:31:18 - INFO - __main__ - Step 268: {'lr': 0.00019071428571428573, 'samples': 12864, 'steps': 267, 'loss/train': 6.486904621124268}
|
695 |
+
07/25/2024 06:31:18 - INFO - __main__ - Step 269: {'lr': 0.00019142857142857142, 'samples': 12912, 'steps': 268, 'loss/train': 5.638678073883057}
|
696 |
+
07/25/2024 06:31:19 - INFO - __main__ - Step 270: {'lr': 0.00019214285714285714, 'samples': 12960, 'steps': 269, 'loss/train': 5.088951110839844}
|
697 |
+
07/25/2024 06:31:19 - INFO - __main__ - Step 271: {'lr': 0.00019285714285714286, 'samples': 13008, 'steps': 270, 'loss/train': 5.137499809265137}
|
698 |
+
07/25/2024 06:31:19 - INFO - __main__ - Step 272: {'lr': 0.00019357142857142856, 'samples': 13056, 'steps': 271, 'loss/train': 4.604417324066162}
|
699 |
+
07/25/2024 06:31:19 - INFO - __main__ - Step 273: {'lr': 0.00019428571428571428, 'samples': 13104, 'steps': 272, 'loss/train': 5.781164646148682}
|
700 |
+
07/25/2024 06:31:20 - INFO - __main__ - Step 274: {'lr': 0.00019500000000000002, 'samples': 13152, 'steps': 273, 'loss/train': 6.4048309326171875}
|
701 |
+
07/25/2024 06:31:20 - INFO - __main__ - Step 275: {'lr': 0.00019571428571428572, 'samples': 13200, 'steps': 274, 'loss/train': 6.040492057800293}
|
702 |
+
07/25/2024 06:31:20 - INFO - __main__ - Step 276: {'lr': 0.00019642857142857144, 'samples': 13248, 'steps': 275, 'loss/train': 5.667052745819092}
|
703 |
+
07/25/2024 06:31:21 - INFO - __main__ - Step 277: {'lr': 0.00019714285714285716, 'samples': 13296, 'steps': 276, 'loss/train': 5.5247483253479}
|
704 |
+
07/25/2024 06:31:21 - INFO - __main__ - Step 278: {'lr': 0.00019785714285714288, 'samples': 13344, 'steps': 277, 'loss/train': 5.584035396575928}
|
705 |
+
07/25/2024 06:31:21 - INFO - __main__ - Step 279: {'lr': 0.00019857142857142857, 'samples': 13392, 'steps': 278, 'loss/train': 5.613864898681641}
|
706 |
+
07/25/2024 06:31:21 - INFO - __main__ - Step 280: {'lr': 0.0001992857142857143, 'samples': 13440, 'steps': 279, 'loss/train': 5.550878524780273}
|
707 |
+
07/25/2024 06:31:22 - INFO - __main__ - Step 281: {'lr': 0.0002, 'samples': 13488, 'steps': 280, 'loss/train': 6.560573101043701}
|
708 |
+
07/25/2024 06:31:22 - INFO - __main__ - Step 282: {'lr': 0.0002007142857142857, 'samples': 13536, 'steps': 281, 'loss/train': 5.38557767868042}
|
709 |
+
07/25/2024 06:31:22 - INFO - __main__ - Step 283: {'lr': 0.00020142857142857142, 'samples': 13584, 'steps': 282, 'loss/train': 6.759729862213135}
|
710 |
+
07/25/2024 06:31:23 - INFO - __main__ - Step 284: {'lr': 0.00020214285714285714, 'samples': 13632, 'steps': 283, 'loss/train': 6.179801940917969}
|
711 |
+
07/25/2024 06:31:23 - INFO - __main__ - Step 285: {'lr': 0.00020285714285714286, 'samples': 13680, 'steps': 284, 'loss/train': 5.904941082000732}
|
712 |
+
07/25/2024 06:31:23 - INFO - __main__ - Step 286: {'lr': 0.00020357142857142858, 'samples': 13728, 'steps': 285, 'loss/train': 5.76945161819458}
|
713 |
+
07/25/2024 06:31:23 - INFO - __main__ - Step 287: {'lr': 0.0002042857142857143, 'samples': 13776, 'steps': 286, 'loss/train': 8.2332124710083}
|
714 |
+
07/25/2024 06:31:24 - INFO - __main__ - Step 288: {'lr': 0.000205, 'samples': 13824, 'steps': 287, 'loss/train': 5.863339900970459}
|
715 |
+
07/25/2024 06:31:24 - INFO - __main__ - Step 289: {'lr': 0.00020571428571428572, 'samples': 13872, 'steps': 288, 'loss/train': 6.213030815124512}
|
716 |
+
07/25/2024 06:31:24 - INFO - __main__ - Step 290: {'lr': 0.00020642857142857144, 'samples': 13920, 'steps': 289, 'loss/train': 4.734172821044922}
|
717 |
+
07/25/2024 06:31:25 - INFO - __main__ - Step 291: {'lr': 0.00020714285714285716, 'samples': 13968, 'steps': 290, 'loss/train': 5.674801349639893}
|
718 |
+
07/25/2024 06:31:25 - INFO - __main__ - Step 292: {'lr': 0.00020785714285714285, 'samples': 14016, 'steps': 291, 'loss/train': 5.784888744354248}
|
719 |
+
07/25/2024 06:31:25 - INFO - __main__ - Step 293: {'lr': 0.00020857142857142857, 'samples': 14064, 'steps': 292, 'loss/train': 5.5319390296936035}
|
720 |
+
07/25/2024 06:31:25 - INFO - __main__ - Step 294: {'lr': 0.0002092857142857143, 'samples': 14112, 'steps': 293, 'loss/train': 5.685769557952881}
|
721 |
+
07/25/2024 06:31:26 - INFO - __main__ - Step 295: {'lr': 0.00021, 'samples': 14160, 'steps': 294, 'loss/train': 5.418774604797363}
|
722 |
+
07/25/2024 06:31:26 - INFO - __main__ - Step 296: {'lr': 0.00021071428571428573, 'samples': 14208, 'steps': 295, 'loss/train': 4.068847179412842}
|
723 |
+
07/25/2024 06:31:26 - INFO - __main__ - Step 297: {'lr': 0.00021142857142857145, 'samples': 14256, 'steps': 296, 'loss/train': 5.367792129516602}
|
724 |
+
07/25/2024 06:31:26 - INFO - __main__ - Step 298: {'lr': 0.00021214285714285714, 'samples': 14304, 'steps': 297, 'loss/train': 5.713776588439941}
|
725 |
+
07/25/2024 06:31:27 - INFO - __main__ - Step 299: {'lr': 0.00021285714285714286, 'samples': 14352, 'steps': 298, 'loss/train': 5.603511810302734}
|
726 |
+
07/25/2024 06:31:27 - INFO - __main__ - Step 300: {'lr': 0.00021357142857142858, 'samples': 14400, 'steps': 299, 'loss/train': 6.163950443267822}
|
727 |
+
07/25/2024 06:31:27 - INFO - __main__ - Evaluating and saving model checkpoint
|
728 |
+
07/25/2024 06:31:27 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
|
729 |
+
07/25/2024 06:31:31 - INFO - __main__ - Step 300: {'loss/eval': 5.79922342300415, 'perplexity': 330.0431823730469}
|
730 |
+
07/25/2024 06:31:31 - INFO - accelerate.accelerator - Saving current state to my_checkpoint
|
731 |
+
07/25/2024 06:31:31 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
732 |
+
07/25/2024 06:31:32 - INFO - accelerate.checkpointing - Model weights saved in my_checkpoint/model.safetensors
|
733 |
+
07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Optimizer state saved in my_checkpoint/optimizer.bin
|
734 |
+
07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in my_checkpoint/sampler.bin
|
735 |
+
07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
|
736 |
+
07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
|
737 |
+
07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 444048000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f93616ef22d5bde85042d7711dec1a6fe76afa6d746ee923cf7094eca048dfc9
|
3 |
size 444048000
|
my_checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 444048000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f93616ef22d5bde85042d7711dec1a6fe76afa6d746ee923cf7094eca048dfc9
|
3 |
size 444048000
|
my_checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 888189882
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a77feb5639d39c77a88009157dd7606aa5b0f34ac6746f8901afc1b3a558005
|
3 |
size 888189882
|
my_checkpoint/random_states_0.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15124
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0d4ae399df9a4446265f8e15f732a6f7b24573fd9a66d7d000b6618f67bb52b
|
3 |
size 15124
|
my_checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7d64f60290dc935068c5ace097c4f499e1d20a78853407c3044c89457075acd
|
3 |
size 988
|
runs/Jul25_06-22-39_lab/events.out.tfevents.1721888559.lab.31151.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:362433dc92dfd664e0b8b11e20189e505a4dbb1b23290ab6b146fd02f50ca15d
|
3 |
+
size 54158
|