step 100
Browse files
log/debug_0.log
CHANGED
@@ -833,3 +833,61 @@ Mixed precision type: fp16
|
|
833 |
07/24/2024 16:42:58 - INFO - __main__ - Evaluating and saving model checkpoint
|
834 |
07/24/2024 16:42:59 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
|
835 |
07/24/2024 16:43:02 - INFO - __main__ - Step 50: {'loss/eval': 7.611824989318848, 'perplexity': 2021.9647216796875}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
07/24/2024 16:42:58 - INFO - __main__ - Evaluating and saving model checkpoint
|
834 |
07/24/2024 16:42:59 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
|
835 |
07/24/2024 16:43:02 - INFO - __main__ - Step 50: {'loss/eval': 7.611824989318848, 'perplexity': 2021.9647216796875}
|
836 |
+
07/24/2024 16:43:18 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.
|
837 |
+
07/24/2024 16:43:18 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.
|
838 |
+
07/24/2024 16:43:32 - WARNING - huggingface_hub.repository - To https://huggingface.co/shng2025/gptesla-small
|
839 |
+
4d63b0c..caa5803 cerulean-water-119 -> cerulean-water-119
|
840 |
+
|
841 |
+
07/24/2024 16:43:33 - INFO - __main__ - Step 51: {'lr': 3.571428571428571e-05, 'samples': 2448, 'steps': 50, 'loss/train': 6.487676620483398}
|
842 |
+
07/24/2024 16:43:33 - INFO - __main__ - Step 52: {'lr': 3.642857142857143e-05, 'samples': 2496, 'steps': 51, 'loss/train': 7.67896032333374}
|
843 |
+
07/24/2024 16:43:34 - INFO - __main__ - Step 53: {'lr': 3.7142857142857143e-05, 'samples': 2544, 'steps': 52, 'loss/train': 7.545412540435791}
|
844 |
+
07/24/2024 16:43:34 - INFO - __main__ - Step 54: {'lr': 3.7857142857142864e-05, 'samples': 2592, 'steps': 53, 'loss/train': 7.630044937133789}
|
845 |
+
07/24/2024 16:43:34 - INFO - __main__ - Step 55: {'lr': 3.857142857142857e-05, 'samples': 2640, 'steps': 54, 'loss/train': 7.491697311401367}
|
846 |
+
07/24/2024 16:43:34 - INFO - __main__ - Step 56: {'lr': 3.928571428571428e-05, 'samples': 2688, 'steps': 55, 'loss/train': 7.302150249481201}
|
847 |
+
07/24/2024 16:43:35 - INFO - __main__ - Step 57: {'lr': 4e-05, 'samples': 2736, 'steps': 56, 'loss/train': 7.448665142059326}
|
848 |
+
07/24/2024 16:43:35 - INFO - __main__ - Step 58: {'lr': 4.0714285714285717e-05, 'samples': 2784, 'steps': 57, 'loss/train': 6.885580062866211}
|
849 |
+
07/24/2024 16:43:35 - INFO - __main__ - Step 59: {'lr': 4.142857142857143e-05, 'samples': 2832, 'steps': 58, 'loss/train': 7.71057653427124}
|
850 |
+
07/24/2024 16:43:36 - INFO - __main__ - Step 60: {'lr': 4.214285714285714e-05, 'samples': 2880, 'steps': 59, 'loss/train': 7.289674282073975}
|
851 |
+
07/24/2024 16:43:36 - INFO - __main__ - Step 61: {'lr': 4.2857142857142856e-05, 'samples': 2928, 'steps': 60, 'loss/train': 7.388137340545654}
|
852 |
+
07/24/2024 16:43:36 - INFO - __main__ - Step 62: {'lr': 4.3571428571428576e-05, 'samples': 2976, 'steps': 61, 'loss/train': 7.354274749755859}
|
853 |
+
07/24/2024 16:43:36 - INFO - __main__ - Step 63: {'lr': 4.428571428571428e-05, 'samples': 3024, 'steps': 62, 'loss/train': 7.121933937072754}
|
854 |
+
07/24/2024 16:43:37 - INFO - __main__ - Step 64: {'lr': 4.4999999999999996e-05, 'samples': 3072, 'steps': 63, 'loss/train': 6.061006546020508}
|
855 |
+
07/24/2024 16:43:37 - INFO - __main__ - Step 65: {'lr': 4.5714285714285716e-05, 'samples': 3120, 'steps': 64, 'loss/train': 7.104621887207031}
|
856 |
+
07/24/2024 16:43:37 - INFO - __main__ - Step 66: {'lr': 4.642857142857143e-05, 'samples': 3168, 'steps': 65, 'loss/train': 6.724586486816406}
|
857 |
+
07/24/2024 16:43:38 - INFO - __main__ - Step 67: {'lr': 4.714285714285715e-05, 'samples': 3216, 'steps': 66, 'loss/train': 6.689899921417236}
|
858 |
+
07/24/2024 16:43:38 - INFO - __main__ - Step 68: {'lr': 4.7857142857142856e-05, 'samples': 3264, 'steps': 67, 'loss/train': 7.3340067863464355}
|
859 |
+
07/24/2024 16:43:38 - INFO - __main__ - Step 69: {'lr': 4.857142857142857e-05, 'samples': 3312, 'steps': 68, 'loss/train': 7.200557231903076}
|
860 |
+
07/24/2024 16:43:38 - INFO - __main__ - Step 70: {'lr': 4.928571428571429e-05, 'samples': 3360, 'steps': 69, 'loss/train': 6.917683124542236}
|
861 |
+
07/24/2024 16:43:39 - INFO - __main__ - Step 71: {'lr': 5e-05, 'samples': 3408, 'steps': 70, 'loss/train': 7.259862899780273}
|
862 |
+
07/24/2024 16:43:39 - INFO - __main__ - Step 72: {'lr': 5.0714285714285716e-05, 'samples': 3456, 'steps': 71, 'loss/train': 6.847894191741943}
|
863 |
+
07/24/2024 16:43:39 - INFO - __main__ - Step 73: {'lr': 5.142857142857143e-05, 'samples': 3504, 'steps': 72, 'loss/train': 7.104192733764648}
|
864 |
+
07/24/2024 16:43:40 - INFO - __main__ - Step 74: {'lr': 5.214285714285714e-05, 'samples': 3552, 'steps': 73, 'loss/train': 6.7482452392578125}
|
865 |
+
07/24/2024 16:43:40 - INFO - __main__ - Step 75: {'lr': 5.285714285714286e-05, 'samples': 3600, 'steps': 74, 'loss/train': 6.932758808135986}
|
866 |
+
07/24/2024 16:43:40 - INFO - __main__ - Step 76: {'lr': 5.357142857142857e-05, 'samples': 3648, 'steps': 75, 'loss/train': 7.208134651184082}
|
867 |
+
07/24/2024 16:43:40 - INFO - __main__ - Step 77: {'lr': 5.428571428571429e-05, 'samples': 3696, 'steps': 76, 'loss/train': 7.33575439453125}
|
868 |
+
07/24/2024 16:43:41 - INFO - __main__ - Step 78: {'lr': 5.5e-05, 'samples': 3744, 'steps': 77, 'loss/train': 6.378943920135498}
|
869 |
+
07/24/2024 16:43:41 - INFO - __main__ - Step 79: {'lr': 5.5714285714285715e-05, 'samples': 3792, 'steps': 78, 'loss/train': 7.227607250213623}
|
870 |
+
07/24/2024 16:43:41 - INFO - __main__ - Step 80: {'lr': 5.642857142857143e-05, 'samples': 3840, 'steps': 79, 'loss/train': 6.442720890045166}
|
871 |
+
07/24/2024 16:43:41 - INFO - __main__ - Step 81: {'lr': 5.714285714285714e-05, 'samples': 3888, 'steps': 80, 'loss/train': 7.17632532119751}
|
872 |
+
07/24/2024 16:43:42 - INFO - __main__ - Step 82: {'lr': 5.7857142857142855e-05, 'samples': 3936, 'steps': 81, 'loss/train': 6.7079668045043945}
|
873 |
+
07/24/2024 16:43:42 - INFO - __main__ - Step 83: {'lr': 5.8571428571428575e-05, 'samples': 3984, 'steps': 82, 'loss/train': 6.951054096221924}
|
874 |
+
07/24/2024 16:43:42 - INFO - __main__ - Step 84: {'lr': 5.928571428571429e-05, 'samples': 4032, 'steps': 83, 'loss/train': 7.006659507751465}
|
875 |
+
07/24/2024 16:43:43 - INFO - __main__ - Step 85: {'lr': 6e-05, 'samples': 4080, 'steps': 84, 'loss/train': 6.719784736633301}
|
876 |
+
07/24/2024 16:43:43 - INFO - __main__ - Step 86: {'lr': 6.0714285714285715e-05, 'samples': 4128, 'steps': 85, 'loss/train': 6.775872230529785}
|
877 |
+
07/24/2024 16:43:43 - INFO - __main__ - Step 87: {'lr': 6.142857142857143e-05, 'samples': 4176, 'steps': 86, 'loss/train': 6.850857257843018}
|
878 |
+
07/24/2024 16:43:43 - INFO - __main__ - Step 88: {'lr': 6.214285714285714e-05, 'samples': 4224, 'steps': 87, 'loss/train': 5.614303112030029}
|
879 |
+
07/24/2024 16:43:44 - INFO - __main__ - Step 89: {'lr': 6.285714285714286e-05, 'samples': 4272, 'steps': 88, 'loss/train': 6.760764122009277}
|
880 |
+
07/24/2024 16:43:44 - INFO - __main__ - Step 90: {'lr': 6.357142857142857e-05, 'samples': 4320, 'steps': 89, 'loss/train': 6.488976955413818}
|
881 |
+
07/24/2024 16:43:44 - INFO - __main__ - Step 91: {'lr': 6.428571428571427e-05, 'samples': 4368, 'steps': 90, 'loss/train': 6.214510917663574}
|
882 |
+
07/24/2024 16:43:45 - INFO - __main__ - Step 92: {'lr': 6.500000000000001e-05, 'samples': 4416, 'steps': 91, 'loss/train': 7.034832954406738}
|
883 |
+
07/24/2024 16:43:45 - INFO - __main__ - Step 93: {'lr': 6.571428571428571e-05, 'samples': 4464, 'steps': 92, 'loss/train': 6.2593488693237305}
|
884 |
+
07/24/2024 16:43:45 - INFO - __main__ - Step 94: {'lr': 6.642857142857143e-05, 'samples': 4512, 'steps': 93, 'loss/train': 7.205167293548584}
|
885 |
+
07/24/2024 16:43:45 - INFO - __main__ - Step 95: {'lr': 6.714285714285714e-05, 'samples': 4560, 'steps': 94, 'loss/train': 6.675778865814209}
|
886 |
+
07/24/2024 16:43:46 - INFO - __main__ - Step 96: {'lr': 6.785714285714285e-05, 'samples': 4608, 'steps': 95, 'loss/train': 4.166206359863281}
|
887 |
+
07/24/2024 16:43:46 - INFO - __main__ - Step 97: {'lr': 6.857142857142858e-05, 'samples': 4656, 'steps': 96, 'loss/train': 6.848745346069336}
|
888 |
+
07/24/2024 16:43:46 - INFO - __main__ - Step 98: {'lr': 6.928571428571429e-05, 'samples': 4704, 'steps': 97, 'loss/train': 6.357327461242676}
|
889 |
+
07/24/2024 16:43:47 - INFO - __main__ - Step 99: {'lr': 7.000000000000001e-05, 'samples': 4752, 'steps': 98, 'loss/train': 6.601438999176025}
|
890 |
+
07/24/2024 16:43:47 - INFO - __main__ - Step 100: {'lr': 7.071428571428571e-05, 'samples': 4800, 'steps': 99, 'loss/train': 6.914941310882568}
|
891 |
+
07/24/2024 16:43:47 - INFO - __main__ - Evaluating and saving model checkpoint
|
892 |
+
07/24/2024 16:43:47 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
|
893 |
+
07/24/2024 16:43:51 - INFO - __main__ - Step 100: {'loss/eval': 6.708734035491943, 'perplexity': 819.532470703125}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 444048000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1968dacc4f76327a8cc7946a104284cd8bdef12e64f04cc607036d62f2d35f90
|
3 |
size 444048000
|
runs/Jul24_16-42-21_lab/events.out.tfevents.1721839341.lab.84177.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55461deb944c8bcb395ade314460461e1c145431aa5b62407ba220dd3ffe064b
|
3 |
+
size 17878
|