shng2025 commited on
Commit
aae7e8d
1 Parent(s): 4d31a9f
log/debug_0.log CHANGED
@@ -669,3 +669,69 @@ Mixed precision type: fp16
669
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
670
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
671
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
670
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
671
  07/25/2024 06:29:44 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
672
+ 07/25/2024 06:30:45 - WARNING - huggingface_hub.repository - Several commits (5) will be pushed upstream.
673
+ 07/25/2024 06:30:45 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.
674
+ 07/25/2024 06:31:13 - WARNING - huggingface_hub.repository - To https://huggingface.co/shng2025/gptesla-small
675
+ d02b805..4d31a9f celestial-aardvark-128 -> celestial-aardvark-128
676
+
677
+ 07/25/2024 06:31:13 - INFO - __main__ - Step 251: {'lr': 0.00017857142857142857, 'samples': 12048, 'steps': 250, 'loss/train': 5.627201557159424}
678
+ 07/25/2024 06:31:14 - INFO - __main__ - Step 252: {'lr': 0.0001792857142857143, 'samples': 12096, 'steps': 251, 'loss/train': 6.002392292022705}
679
+ 07/25/2024 06:31:14 - INFO - __main__ - Step 253: {'lr': 0.00017999999999999998, 'samples': 12144, 'steps': 252, 'loss/train': 5.872100353240967}
680
+ 07/25/2024 06:31:14 - INFO - __main__ - Step 254: {'lr': 0.00018071428571428573, 'samples': 12192, 'steps': 253, 'loss/train': 6.0609612464904785}
681
+ 07/25/2024 06:31:14 - INFO - __main__ - Step 255: {'lr': 0.00018142857142857145, 'samples': 12240, 'steps': 254, 'loss/train': 6.275620460510254}
682
+ 07/25/2024 06:31:15 - INFO - __main__ - Step 256: {'lr': 0.00018214285714285714, 'samples': 12288, 'steps': 255, 'loss/train': 6.78406286239624}
683
+ 07/25/2024 06:31:15 - INFO - __main__ - Step 257: {'lr': 0.00018285714285714286, 'samples': 12336, 'steps': 256, 'loss/train': 6.069532871246338}
684
+ 07/25/2024 06:31:15 - INFO - __main__ - Step 258: {'lr': 0.00018357142857142858, 'samples': 12384, 'steps': 257, 'loss/train': 5.567933559417725}
685
+ 07/25/2024 06:31:16 - INFO - __main__ - Step 259: {'lr': 0.00018428571428571428, 'samples': 12432, 'steps': 258, 'loss/train': 6.152994632720947}
686
+ 07/25/2024 06:31:16 - INFO - __main__ - Step 260: {'lr': 0.000185, 'samples': 12480, 'steps': 259, 'loss/train': 5.771788120269775}
687
+ 07/25/2024 06:31:16 - INFO - __main__ - Step 261: {'lr': 0.00018571428571428572, 'samples': 12528, 'steps': 260, 'loss/train': 5.717995643615723}
688
+ 07/25/2024 06:31:16 - INFO - __main__ - Step 262: {'lr': 0.0001864285714285714, 'samples': 12576, 'steps': 261, 'loss/train': 5.839302062988281}
689
+ 07/25/2024 06:31:17 - INFO - __main__ - Step 263: {'lr': 0.00018714285714285713, 'samples': 12624, 'steps': 262, 'loss/train': 5.257016658782959}
690
+ 07/25/2024 06:31:17 - INFO - __main__ - Step 264: {'lr': 0.00018785714285714288, 'samples': 12672, 'steps': 263, 'loss/train': 6.241714000701904}
691
+ 07/25/2024 06:31:17 - INFO - __main__ - Step 265: {'lr': 0.0001885714285714286, 'samples': 12720, 'steps': 264, 'loss/train': 6.639944553375244}
692
+ 07/25/2024 06:31:17 - INFO - __main__ - Step 266: {'lr': 0.0001892857142857143, 'samples': 12768, 'steps': 265, 'loss/train': 5.12101936340332}
693
+ 07/25/2024 06:31:18 - INFO - __main__ - Step 267: {'lr': 0.00019, 'samples': 12816, 'steps': 266, 'loss/train': 5.190861701965332}
694
+ 07/25/2024 06:31:18 - INFO - __main__ - Step 268: {'lr': 0.00019071428571428573, 'samples': 12864, 'steps': 267, 'loss/train': 6.486904621124268}
695
+ 07/25/2024 06:31:18 - INFO - __main__ - Step 269: {'lr': 0.00019142857142857142, 'samples': 12912, 'steps': 268, 'loss/train': 5.638678073883057}
696
+ 07/25/2024 06:31:19 - INFO - __main__ - Step 270: {'lr': 0.00019214285714285714, 'samples': 12960, 'steps': 269, 'loss/train': 5.088951110839844}
697
+ 07/25/2024 06:31:19 - INFO - __main__ - Step 271: {'lr': 0.00019285714285714286, 'samples': 13008, 'steps': 270, 'loss/train': 5.137499809265137}
698
+ 07/25/2024 06:31:19 - INFO - __main__ - Step 272: {'lr': 0.00019357142857142856, 'samples': 13056, 'steps': 271, 'loss/train': 4.604417324066162}
699
+ 07/25/2024 06:31:19 - INFO - __main__ - Step 273: {'lr': 0.00019428571428571428, 'samples': 13104, 'steps': 272, 'loss/train': 5.781164646148682}
700
+ 07/25/2024 06:31:20 - INFO - __main__ - Step 274: {'lr': 0.00019500000000000002, 'samples': 13152, 'steps': 273, 'loss/train': 6.4048309326171875}
701
+ 07/25/2024 06:31:20 - INFO - __main__ - Step 275: {'lr': 0.00019571428571428572, 'samples': 13200, 'steps': 274, 'loss/train': 6.040492057800293}
702
+ 07/25/2024 06:31:20 - INFO - __main__ - Step 276: {'lr': 0.00019642857142857144, 'samples': 13248, 'steps': 275, 'loss/train': 5.667052745819092}
703
+ 07/25/2024 06:31:21 - INFO - __main__ - Step 277: {'lr': 0.00019714285714285716, 'samples': 13296, 'steps': 276, 'loss/train': 5.5247483253479}
704
+ 07/25/2024 06:31:21 - INFO - __main__ - Step 278: {'lr': 0.00019785714285714288, 'samples': 13344, 'steps': 277, 'loss/train': 5.584035396575928}
705
+ 07/25/2024 06:31:21 - INFO - __main__ - Step 279: {'lr': 0.00019857142857142857, 'samples': 13392, 'steps': 278, 'loss/train': 5.613864898681641}
706
+ 07/25/2024 06:31:21 - INFO - __main__ - Step 280: {'lr': 0.0001992857142857143, 'samples': 13440, 'steps': 279, 'loss/train': 5.550878524780273}
707
+ 07/25/2024 06:31:22 - INFO - __main__ - Step 281: {'lr': 0.0002, 'samples': 13488, 'steps': 280, 'loss/train': 6.560573101043701}
708
+ 07/25/2024 06:31:22 - INFO - __main__ - Step 282: {'lr': 0.0002007142857142857, 'samples': 13536, 'steps': 281, 'loss/train': 5.38557767868042}
709
+ 07/25/2024 06:31:22 - INFO - __main__ - Step 283: {'lr': 0.00020142857142857142, 'samples': 13584, 'steps': 282, 'loss/train': 6.759729862213135}
710
+ 07/25/2024 06:31:23 - INFO - __main__ - Step 284: {'lr': 0.00020214285714285714, 'samples': 13632, 'steps': 283, 'loss/train': 6.179801940917969}
711
+ 07/25/2024 06:31:23 - INFO - __main__ - Step 285: {'lr': 0.00020285714285714286, 'samples': 13680, 'steps': 284, 'loss/train': 5.904941082000732}
712
+ 07/25/2024 06:31:23 - INFO - __main__ - Step 286: {'lr': 0.00020357142857142858, 'samples': 13728, 'steps': 285, 'loss/train': 5.76945161819458}
713
+ 07/25/2024 06:31:23 - INFO - __main__ - Step 287: {'lr': 0.0002042857142857143, 'samples': 13776, 'steps': 286, 'loss/train': 8.2332124710083}
714
+ 07/25/2024 06:31:24 - INFO - __main__ - Step 288: {'lr': 0.000205, 'samples': 13824, 'steps': 287, 'loss/train': 5.863339900970459}
715
+ 07/25/2024 06:31:24 - INFO - __main__ - Step 289: {'lr': 0.00020571428571428572, 'samples': 13872, 'steps': 288, 'loss/train': 6.213030815124512}
716
+ 07/25/2024 06:31:24 - INFO - __main__ - Step 290: {'lr': 0.00020642857142857144, 'samples': 13920, 'steps': 289, 'loss/train': 4.734172821044922}
717
+ 07/25/2024 06:31:25 - INFO - __main__ - Step 291: {'lr': 0.00020714285714285716, 'samples': 13968, 'steps': 290, 'loss/train': 5.674801349639893}
718
+ 07/25/2024 06:31:25 - INFO - __main__ - Step 292: {'lr': 0.00020785714285714285, 'samples': 14016, 'steps': 291, 'loss/train': 5.784888744354248}
719
+ 07/25/2024 06:31:25 - INFO - __main__ - Step 293: {'lr': 0.00020857142857142857, 'samples': 14064, 'steps': 292, 'loss/train': 5.5319390296936035}
720
+ 07/25/2024 06:31:25 - INFO - __main__ - Step 294: {'lr': 0.0002092857142857143, 'samples': 14112, 'steps': 293, 'loss/train': 5.685769557952881}
721
+ 07/25/2024 06:31:26 - INFO - __main__ - Step 295: {'lr': 0.00021, 'samples': 14160, 'steps': 294, 'loss/train': 5.418774604797363}
722
+ 07/25/2024 06:31:26 - INFO - __main__ - Step 296: {'lr': 0.00021071428571428573, 'samples': 14208, 'steps': 295, 'loss/train': 4.068847179412842}
723
+ 07/25/2024 06:31:26 - INFO - __main__ - Step 297: {'lr': 0.00021142857142857145, 'samples': 14256, 'steps': 296, 'loss/train': 5.367792129516602}
724
+ 07/25/2024 06:31:26 - INFO - __main__ - Step 298: {'lr': 0.00021214285714285714, 'samples': 14304, 'steps': 297, 'loss/train': 5.713776588439941}
725
+ 07/25/2024 06:31:27 - INFO - __main__ - Step 299: {'lr': 0.00021285714285714286, 'samples': 14352, 'steps': 298, 'loss/train': 5.603511810302734}
726
+ 07/25/2024 06:31:27 - INFO - __main__ - Step 300: {'lr': 0.00021357142857142858, 'samples': 14400, 'steps': 299, 'loss/train': 6.163950443267822}
727
+ 07/25/2024 06:31:27 - INFO - __main__ - Evaluating and saving model checkpoint
728
+ 07/25/2024 06:31:27 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
729
+ 07/25/2024 06:31:31 - INFO - __main__ - Step 300: {'loss/eval': 5.79922342300415, 'perplexity': 330.0431823730469}
730
+ 07/25/2024 06:31:31 - INFO - accelerate.accelerator - Saving current state to my_checkpoint
731
+ 07/25/2024 06:31:31 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
732
+ 07/25/2024 06:31:32 - INFO - accelerate.checkpointing - Model weights saved in my_checkpoint/model.safetensors
733
+ 07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Optimizer state saved in my_checkpoint/optimizer.bin
734
+ 07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in my_checkpoint/sampler.bin
735
+ 07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
736
+ 07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
737
+ 07/25/2024 06:31:33 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ad0fd09b30c14ad746b6d038c941ff80e9650264689101b4a3a85e6147943c1
3
  size 444048000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f93616ef22d5bde85042d7711dec1a6fe76afa6d746ee923cf7094eca048dfc9
3
  size 444048000
my_checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ad0fd09b30c14ad746b6d038c941ff80e9650264689101b4a3a85e6147943c1
3
  size 444048000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f93616ef22d5bde85042d7711dec1a6fe76afa6d746ee923cf7094eca048dfc9
3
  size 444048000
my_checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cc7f64faa5a43c57e85db63461de13d22249564bf77ab3360d8d4b48b1b8cac
3
  size 888189882
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a77feb5639d39c77a88009157dd7606aa5b0f34ac6746f8901afc1b3a558005
3
  size 888189882
my_checkpoint/random_states_0.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be7e616f50d1065fb1eed506a0f081bc60c75259c0c0a55c3effc8df4d41f12a
3
  size 15124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d4ae399df9a4446265f8e15f732a6f7b24573fd9a66d7d000b6618f67bb52b
3
  size 15124
my_checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a544767a1c3ca06d376f956622d54d64e5f117ac7a8c9bd53e41b843854ad2c
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d64f60290dc935068c5ace097c4f499e1d20a78853407c3044c89457075acd
3
  size 988
runs/Jul25_06-22-39_lab/events.out.tfevents.1721888559.lab.31151.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f53e62237209297b7d72acd143ad81fbe97fb0c0d0542f38bfc40e1a15c8a504
3
- size 45061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362433dc92dfd664e0b8b11e20189e505a4dbb1b23290ab6b146fd02f50ca15d
3
+ size 54158