sample model
Browse files- .gitattributes +0 -0
- config.json +1 -1
- generation_config.json +0 -0
- log/debug_0.log +114 -0
- merges.txt +0 -0
- pytorch_model.bin +1 -1
- runs/Jun08_12-24-51_2aaab01b09a9/1686227091.01416/events.out.tfevents.1686227091.2aaab01b09a9.5503.1 +3 -0
- runs/Jun08_12-24-51_2aaab01b09a9/events.out.tfevents.1686227091.2aaab01b09a9.5503.0 +3 -0
- special_tokens_map.json +0 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
- train_all.py +244 -0
- train_raw.txt +0 -0
- valid_raw.txt +0 -0
- vocab.json +0 -0
- wandb/debug-internal.log +1 -0
- wandb/debug.log +1 -0
- wandb/latest-run +1 -0
- wandb/run-20230608_122450-vrqnfbac/files/config.yaml +111 -0
- wandb/run-20230608_122450-vrqnfbac/files/output.log +219 -0
- wandb/run-20230608_122450-vrqnfbac/files/requirements.txt +171 -0
- wandb/run-20230608_122450-vrqnfbac/files/wandb-metadata.json +65 -0
- wandb/run-20230608_122450-vrqnfbac/files/wandb-summary.json +1 -0
- wandb/run-20230608_122450-vrqnfbac/logs/debug-internal.log +0 -0
- wandb/run-20230608_122450-vrqnfbac/logs/debug.log +27 -0
- wandb/run-20230608_122450-vrqnfbac/run-vrqnfbac.wandb +0 -0
.gitattributes
CHANGED
File without changes
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
generation_config.json
CHANGED
File without changes
|
log/debug_0.log
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
06/08/2023 12:24:51 - INFO - __main__ - Distributed environment: NO
|
2 |
+
Num processes: 1
|
3 |
+
Process index: 0
|
4 |
+
Local process index: 0
|
5 |
+
Device: cpu
|
6 |
+
|
7 |
+
Mixed precision type: fp16
|
8 |
+
|
9 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository - /workspace/custom_llm-small/./ is already a clone of https://huggingface.co/krupalkp/custom_llm-small. Make sure you pull the latest changes with `repo.git_pull()`.
|
10 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository - Revision `glorious-sound-1` does not exist. Created and checked out branch `glorious-sound-1`.
|
11 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository -
|
12 |
+
06/08/2023 12:24:52 - INFO - datasets.builder - Using custom data configuration default-0f955d751e26ae0d
|
13 |
+
06/08/2023 12:24:52 - INFO - datasets.info - Loading Dataset Infos from /workspace/envs/llmenv/lib/python3.8/site-packages/datasets/packaged_modules/text
|
14 |
+
06/08/2023 12:24:52 - INFO - datasets.builder - Using custom data configuration default-da36a6bce6dd6929
|
15 |
+
06/08/2023 12:24:52 - INFO - datasets.info - Loading Dataset Infos from /workspace/envs/llmenv/lib/python3.8/site-packages/datasets/packaged_modules/text
|
16 |
+
06/08/2023 12:26:05 - INFO - __main__ - Step 1: {'lr': 0.0, 'samples': 2, 'steps': 0, 'loss/train': 9.792549133300781}
|
17 |
+
06/08/2023 12:27:03 - INFO - __main__ - Step 2: {'lr': 0.0, 'samples': 4, 'steps': 0, 'loss/train': 9.825643539428711}
|
18 |
+
06/08/2023 12:27:19 - INFO - __main__ - Step 3: {'lr': 0.0, 'samples': 6, 'steps': 0, 'loss/train': 9.78059196472168}
|
19 |
+
06/08/2023 12:27:35 - INFO - __main__ - Step 4: {'lr': 0.0, 'samples': 8, 'steps': 0, 'loss/train': 9.781628608703613}
|
20 |
+
06/08/2023 12:27:51 - INFO - __main__ - Step 5: {'lr': 0.0, 'samples': 10, 'steps': 0, 'loss/train': 9.810882568359375}
|
21 |
+
06/08/2023 12:28:06 - INFO - __main__ - Step 6: {'lr': 0.0, 'samples': 12, 'steps': 0, 'loss/train': 9.808069229125977}
|
22 |
+
06/08/2023 12:28:22 - INFO - __main__ - Step 7: {'lr': 0.0, 'samples': 14, 'steps': 0, 'loss/train': 9.817597389221191}
|
23 |
+
06/08/2023 12:28:37 - INFO - __main__ - Step 8: {'lr': 0.0, 'samples': 16, 'steps': 0, 'loss/train': 9.784443855285645}
|
24 |
+
06/08/2023 12:28:53 - INFO - __main__ - Step 9: {'lr': 0.0, 'samples': 18, 'steps': 0, 'loss/train': 9.826574325561523}
|
25 |
+
06/08/2023 12:29:08 - INFO - __main__ - Step 10: {'lr': 0.0, 'samples': 20, 'steps': 0, 'loss/train': 9.826700210571289}
|
26 |
+
06/08/2023 12:29:24 - INFO - __main__ - Step 11: {'lr': 0.0, 'samples': 22, 'steps': 0, 'loss/train': 9.811628341674805}
|
27 |
+
06/08/2023 12:29:40 - INFO - __main__ - Step 12: {'lr': 0.0, 'samples': 24, 'steps': 0, 'loss/train': 9.823099136352539}
|
28 |
+
06/08/2023 12:29:56 - INFO - __main__ - Step 13: {'lr': 0.0, 'samples': 26, 'steps': 0, 'loss/train': 9.831729888916016}
|
29 |
+
06/08/2023 12:30:12 - INFO - __main__ - Step 14: {'lr': 0.0, 'samples': 28, 'steps': 0, 'loss/train': 9.839056015014648}
|
30 |
+
06/08/2023 12:30:28 - INFO - __main__ - Step 15: {'lr': 0.0, 'samples': 30, 'steps': 0, 'loss/train': 9.804789543151855}
|
31 |
+
06/08/2023 12:30:45 - INFO - __main__ - Step 16: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 9.805603981018066}
|
32 |
+
06/08/2023 12:31:02 - INFO - __main__ - Step 17: {'lr': 2.6666666666666667e-07, 'samples': 34, 'steps': 1, 'loss/train': 9.789372444152832}
|
33 |
+
06/08/2023 12:31:18 - INFO - __main__ - Step 18: {'lr': 2.6666666666666667e-07, 'samples': 36, 'steps': 1, 'loss/train': 9.841607093811035}
|
34 |
+
06/08/2023 12:31:35 - INFO - __main__ - Step 19: {'lr': 2.6666666666666667e-07, 'samples': 38, 'steps': 1, 'loss/train': 9.838142395019531}
|
35 |
+
06/08/2023 12:31:51 - INFO - __main__ - Step 20: {'lr': 2.6666666666666667e-07, 'samples': 40, 'steps': 1, 'loss/train': 9.802177429199219}
|
36 |
+
06/08/2023 12:32:07 - INFO - __main__ - Step 21: {'lr': 2.6666666666666667e-07, 'samples': 42, 'steps': 1, 'loss/train': 9.837615013122559}
|
37 |
+
06/08/2023 12:32:23 - INFO - __main__ - Step 22: {'lr': 2.6666666666666667e-07, 'samples': 44, 'steps': 1, 'loss/train': 9.80981731414795}
|
38 |
+
06/08/2023 12:32:40 - INFO - __main__ - Step 23: {'lr': 2.6666666666666667e-07, 'samples': 46, 'steps': 1, 'loss/train': 9.793614387512207}
|
39 |
+
06/08/2023 12:32:56 - INFO - __main__ - Step 24: {'lr': 2.6666666666666667e-07, 'samples': 48, 'steps': 1, 'loss/train': 9.803434371948242}
|
40 |
+
06/08/2023 12:33:12 - INFO - __main__ - Step 25: {'lr': 2.6666666666666667e-07, 'samples': 50, 'steps': 1, 'loss/train': 9.80640697479248}
|
41 |
+
06/08/2023 12:33:28 - INFO - __main__ - Step 26: {'lr': 2.6666666666666667e-07, 'samples': 52, 'steps': 1, 'loss/train': 9.839242935180664}
|
42 |
+
06/08/2023 12:33:44 - INFO - __main__ - Step 27: {'lr': 2.6666666666666667e-07, 'samples': 54, 'steps': 1, 'loss/train': 9.837196350097656}
|
43 |
+
06/08/2023 12:34:00 - INFO - __main__ - Step 28: {'lr': 2.6666666666666667e-07, 'samples': 56, 'steps': 1, 'loss/train': 9.830636978149414}
|
44 |
+
06/08/2023 12:34:16 - INFO - __main__ - Step 29: {'lr': 2.6666666666666667e-07, 'samples': 58, 'steps': 1, 'loss/train': 9.835775375366211}
|
45 |
+
06/08/2023 12:34:32 - INFO - __main__ - Step 30: {'lr': 2.6666666666666667e-07, 'samples': 60, 'steps': 1, 'loss/train': 9.797348976135254}
|
46 |
+
06/08/2023 12:34:48 - INFO - __main__ - Step 31: {'lr': 2.6666666666666667e-07, 'samples': 62, 'steps': 1, 'loss/train': 9.817122459411621}
|
47 |
+
06/08/2023 12:35:04 - INFO - __main__ - Step 32: {'lr': 2.6666666666666667e-07, 'samples': 64, 'steps': 1, 'loss/train': 9.825984001159668}
|
48 |
+
06/08/2023 12:35:20 - INFO - __main__ - Step 33: {'lr': 5.333333333333333e-07, 'samples': 66, 'steps': 2, 'loss/train': 9.822331428527832}
|
49 |
+
06/08/2023 12:35:36 - INFO - __main__ - Step 34: {'lr': 5.333333333333333e-07, 'samples': 68, 'steps': 2, 'loss/train': 9.810147285461426}
|
50 |
+
06/08/2023 12:35:53 - INFO - __main__ - Step 35: {'lr': 5.333333333333333e-07, 'samples': 70, 'steps': 2, 'loss/train': 9.826034545898438}
|
51 |
+
06/08/2023 12:36:09 - INFO - __main__ - Step 36: {'lr': 5.333333333333333e-07, 'samples': 72, 'steps': 2, 'loss/train': 9.794151306152344}
|
52 |
+
06/08/2023 12:36:25 - INFO - __main__ - Step 37: {'lr': 5.333333333333333e-07, 'samples': 74, 'steps': 2, 'loss/train': 9.828431129455566}
|
53 |
+
06/08/2023 12:36:41 - INFO - __main__ - Step 38: {'lr': 5.333333333333333e-07, 'samples': 76, 'steps': 2, 'loss/train': 9.776195526123047}
|
54 |
+
06/08/2023 12:36:57 - INFO - __main__ - Step 39: {'lr': 5.333333333333333e-07, 'samples': 78, 'steps': 2, 'loss/train': 9.791631698608398}
|
55 |
+
06/08/2023 12:37:13 - INFO - __main__ - Step 40: {'lr': 5.333333333333333e-07, 'samples': 80, 'steps': 2, 'loss/train': 9.781876564025879}
|
56 |
+
06/08/2023 12:37:29 - INFO - __main__ - Step 41: {'lr': 5.333333333333333e-07, 'samples': 82, 'steps': 2, 'loss/train': 9.809560775756836}
|
57 |
+
06/08/2023 12:37:45 - INFO - __main__ - Step 42: {'lr': 5.333333333333333e-07, 'samples': 84, 'steps': 2, 'loss/train': 9.816283226013184}
|
58 |
+
06/08/2023 12:38:01 - INFO - __main__ - Step 43: {'lr': 5.333333333333333e-07, 'samples': 86, 'steps': 2, 'loss/train': 9.819095611572266}
|
59 |
+
06/08/2023 12:38:17 - INFO - __main__ - Step 44: {'lr': 5.333333333333333e-07, 'samples': 88, 'steps': 2, 'loss/train': 9.795587539672852}
|
60 |
+
06/08/2023 12:38:34 - INFO - __main__ - Step 45: {'lr': 5.333333333333333e-07, 'samples': 90, 'steps': 2, 'loss/train': 9.788451194763184}
|
61 |
+
06/08/2023 12:38:50 - INFO - __main__ - Step 46: {'lr': 5.333333333333333e-07, 'samples': 92, 'steps': 2, 'loss/train': 9.802919387817383}
|
62 |
+
06/08/2023 12:39:06 - INFO - __main__ - Step 47: {'lr': 5.333333333333333e-07, 'samples': 94, 'steps': 2, 'loss/train': 9.7972993850708}
|
63 |
+
06/08/2023 12:39:22 - INFO - __main__ - Step 48: {'lr': 5.333333333333333e-07, 'samples': 96, 'steps': 2, 'loss/train': 9.824687957763672}
|
64 |
+
06/08/2023 12:39:38 - INFO - __main__ - Step 49: {'lr': 8.000000000000001e-07, 'samples': 98, 'steps': 3, 'loss/train': 9.786107063293457}
|
65 |
+
06/08/2023 12:39:54 - INFO - __main__ - Step 50: {'lr': 8.000000000000001e-07, 'samples': 100, 'steps': 3, 'loss/train': 9.771675109863281}
|
66 |
+
06/08/2023 12:40:11 - INFO - __main__ - Step 51: {'lr': 8.000000000000001e-07, 'samples': 102, 'steps': 3, 'loss/train': 9.784013748168945}
|
67 |
+
06/08/2023 12:40:27 - INFO - __main__ - Step 52: {'lr': 8.000000000000001e-07, 'samples': 104, 'steps': 3, 'loss/train': 9.798379898071289}
|
68 |
+
06/08/2023 12:40:43 - INFO - __main__ - Step 53: {'lr': 8.000000000000001e-07, 'samples': 106, 'steps': 3, 'loss/train': 9.767139434814453}
|
69 |
+
06/08/2023 12:40:59 - INFO - __main__ - Step 54: {'lr': 8.000000000000001e-07, 'samples': 108, 'steps': 3, 'loss/train': 9.783173561096191}
|
70 |
+
06/08/2023 12:41:16 - INFO - __main__ - Step 55: {'lr': 8.000000000000001e-07, 'samples': 110, 'steps': 3, 'loss/train': 9.81434154510498}
|
71 |
+
06/08/2023 12:41:33 - INFO - __main__ - Step 56: {'lr': 8.000000000000001e-07, 'samples': 112, 'steps': 3, 'loss/train': 9.798585891723633}
|
72 |
+
06/08/2023 12:41:49 - INFO - __main__ - Step 57: {'lr': 8.000000000000001e-07, 'samples': 114, 'steps': 3, 'loss/train': 9.779496192932129}
|
73 |
+
06/08/2023 12:42:06 - INFO - __main__ - Step 58: {'lr': 8.000000000000001e-07, 'samples': 116, 'steps': 3, 'loss/train': 9.75149154663086}
|
74 |
+
06/08/2023 12:42:22 - INFO - __main__ - Step 59: {'lr': 8.000000000000001e-07, 'samples': 118, 'steps': 3, 'loss/train': 9.797645568847656}
|
75 |
+
06/08/2023 12:42:38 - INFO - __main__ - Step 60: {'lr': 8.000000000000001e-07, 'samples': 120, 'steps': 3, 'loss/train': 9.783336639404297}
|
76 |
+
06/08/2023 12:42:54 - INFO - __main__ - Step 61: {'lr': 8.000000000000001e-07, 'samples': 122, 'steps': 3, 'loss/train': 9.805188179016113}
|
77 |
+
06/08/2023 12:43:10 - INFO - __main__ - Step 62: {'lr': 8.000000000000001e-07, 'samples': 124, 'steps': 3, 'loss/train': 9.794000625610352}
|
78 |
+
06/08/2023 12:43:26 - INFO - __main__ - Step 63: {'lr': 8.000000000000001e-07, 'samples': 126, 'steps': 3, 'loss/train': 9.763993263244629}
|
79 |
+
06/08/2023 12:43:42 - INFO - __main__ - Step 64: {'lr': 8.000000000000001e-07, 'samples': 128, 'steps': 3, 'loss/train': 9.760546684265137}
|
80 |
+
06/08/2023 12:43:58 - INFO - __main__ - Step 65: {'lr': 1.0666666666666667e-06, 'samples': 130, 'steps': 4, 'loss/train': 9.741477966308594}
|
81 |
+
06/08/2023 12:44:14 - INFO - __main__ - Step 66: {'lr': 1.0666666666666667e-06, 'samples': 132, 'steps': 4, 'loss/train': 9.758099555969238}
|
82 |
+
06/08/2023 12:44:30 - INFO - __main__ - Step 67: {'lr': 1.0666666666666667e-06, 'samples': 134, 'steps': 4, 'loss/train': 9.758442878723145}
|
83 |
+
06/08/2023 12:44:46 - INFO - __main__ - Step 68: {'lr': 1.0666666666666667e-06, 'samples': 136, 'steps': 4, 'loss/train': 9.744771003723145}
|
84 |
+
06/08/2023 12:45:03 - INFO - __main__ - Step 69: {'lr': 1.0666666666666667e-06, 'samples': 138, 'steps': 4, 'loss/train': 9.757477760314941}
|
85 |
+
06/08/2023 12:45:19 - INFO - __main__ - Step 70: {'lr': 1.0666666666666667e-06, 'samples': 140, 'steps': 4, 'loss/train': 9.75220775604248}
|
86 |
+
06/08/2023 12:45:35 - INFO - __main__ - Step 71: {'lr': 1.0666666666666667e-06, 'samples': 142, 'steps': 4, 'loss/train': 9.75396728515625}
|
87 |
+
06/08/2023 12:45:51 - INFO - __main__ - Step 72: {'lr': 1.0666666666666667e-06, 'samples': 144, 'steps': 4, 'loss/train': 9.736096382141113}
|
88 |
+
06/08/2023 12:46:08 - INFO - __main__ - Step 73: {'lr': 1.0666666666666667e-06, 'samples': 146, 'steps': 4, 'loss/train': 9.764381408691406}
|
89 |
+
06/08/2023 12:46:24 - INFO - __main__ - Step 74: {'lr': 1.0666666666666667e-06, 'samples': 148, 'steps': 4, 'loss/train': 9.774300575256348}
|
90 |
+
06/08/2023 12:46:40 - INFO - __main__ - Step 75: {'lr': 1.0666666666666667e-06, 'samples': 150, 'steps': 4, 'loss/train': 9.743051528930664}
|
91 |
+
06/08/2023 12:46:56 - INFO - __main__ - Step 76: {'lr': 1.0666666666666667e-06, 'samples': 152, 'steps': 4, 'loss/train': 9.746865272521973}
|
92 |
+
06/08/2023 12:47:12 - INFO - __main__ - Step 77: {'lr': 1.0666666666666667e-06, 'samples': 154, 'steps': 4, 'loss/train': 9.73295783996582}
|
93 |
+
06/08/2023 12:47:28 - INFO - __main__ - Step 78: {'lr': 1.0666666666666667e-06, 'samples': 156, 'steps': 4, 'loss/train': 9.772175788879395}
|
94 |
+
06/08/2023 12:47:44 - INFO - __main__ - Step 79: {'lr': 1.0666666666666667e-06, 'samples': 158, 'steps': 4, 'loss/train': 9.710450172424316}
|
95 |
+
06/08/2023 12:48:00 - INFO - __main__ - Step 80: {'lr': 1.0666666666666667e-06, 'samples': 160, 'steps': 4, 'loss/train': 9.737425804138184}
|
96 |
+
06/08/2023 12:48:16 - INFO - __main__ - Step 81: {'lr': 1.3333333333333334e-06, 'samples': 162, 'steps': 5, 'loss/train': 9.721009254455566}
|
97 |
+
06/08/2023 12:48:32 - INFO - __main__ - Step 82: {'lr': 1.3333333333333334e-06, 'samples': 164, 'steps': 5, 'loss/train': 9.658642768859863}
|
98 |
+
06/08/2023 12:48:49 - INFO - __main__ - Step 83: {'lr': 1.3333333333333334e-06, 'samples': 166, 'steps': 5, 'loss/train': 9.73045825958252}
|
99 |
+
06/08/2023 12:49:05 - INFO - __main__ - Step 84: {'lr': 1.3333333333333334e-06, 'samples': 168, 'steps': 5, 'loss/train': 9.729884147644043}
|
100 |
+
06/08/2023 12:49:21 - INFO - __main__ - Step 85: {'lr': 1.3333333333333334e-06, 'samples': 170, 'steps': 5, 'loss/train': 9.716988563537598}
|
101 |
+
06/08/2023 12:49:37 - INFO - __main__ - Step 86: {'lr': 1.3333333333333334e-06, 'samples': 172, 'steps': 5, 'loss/train': 9.710418701171875}
|
102 |
+
06/08/2023 12:49:53 - INFO - __main__ - Step 87: {'lr': 1.3333333333333334e-06, 'samples': 174, 'steps': 5, 'loss/train': 9.705856323242188}
|
103 |
+
06/08/2023 12:50:09 - INFO - __main__ - Step 88: {'lr': 1.3333333333333334e-06, 'samples': 176, 'steps': 5, 'loss/train': 9.682978630065918}
|
104 |
+
06/08/2023 12:50:26 - INFO - __main__ - Step 89: {'lr': 1.3333333333333334e-06, 'samples': 178, 'steps': 5, 'loss/train': 9.713265419006348}
|
105 |
+
06/08/2023 12:50:42 - INFO - __main__ - Step 90: {'lr': 1.3333333333333334e-06, 'samples': 180, 'steps': 5, 'loss/train': 9.70463752746582}
|
106 |
+
06/08/2023 12:50:58 - INFO - __main__ - Step 91: {'lr': 1.3333333333333334e-06, 'samples': 182, 'steps': 5, 'loss/train': 9.685354232788086}
|
107 |
+
06/08/2023 12:51:14 - INFO - __main__ - Step 92: {'lr': 1.3333333333333334e-06, 'samples': 184, 'steps': 5, 'loss/train': 9.699443817138672}
|
108 |
+
06/08/2023 12:51:30 - INFO - __main__ - Step 93: {'lr': 1.3333333333333334e-06, 'samples': 186, 'steps': 5, 'loss/train': 9.695199966430664}
|
109 |
+
06/08/2023 12:51:46 - INFO - __main__ - Step 94: {'lr': 1.3333333333333334e-06, 'samples': 188, 'steps': 5, 'loss/train': 9.740874290466309}
|
110 |
+
06/08/2023 12:52:02 - INFO - __main__ - Step 95: {'lr': 1.3333333333333334e-06, 'samples': 190, 'steps': 5, 'loss/train': 9.701812744140625}
|
111 |
+
06/08/2023 12:52:19 - INFO - __main__ - Step 96: {'lr': 1.3333333333333334e-06, 'samples': 192, 'steps': 5, 'loss/train': 9.722161293029785}
|
112 |
+
06/08/2023 12:53:26 - INFO - __main__ - Step 97: {'lr': 1.6000000000000001e-06, 'samples': 194, 'steps': 6, 'loss/train': 9.66638469696045}
|
113 |
+
06/08/2023 12:54:12 - INFO - __main__ - Evaluating and saving model after training
|
114 |
+
06/08/2023 12:56:32 - INFO - __main__ - Step 97: {'loss/eval': 9.62712574005127, 'perplexity': 15170.7685546875}
|
merges.txt
CHANGED
File without changes
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 405495997
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:016d8da6f8aad51b24f775a6b290474124a2a6d8ddfd56ae3f0c8d0c98e4a726
|
3 |
size 405495997
|
runs/Jun08_12-24-51_2aaab01b09a9/1686227091.01416/events.out.tfevents.1686227091.2aaab01b09a9.5503.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e12238525f8c9f54ab7abdbe234098306ccde9150465a346db6039c51723e5b
|
3 |
+
size 1673
|
runs/Jun08_12-24-51_2aaab01b09a9/events.out.tfevents.1686227091.2aaab01b09a9.5503.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7c5e31ed1f463661f65775d018eaeef0adc20e9499823c2a086f7b8409d519b
|
3 |
+
size 17255
|
special_tokens_map.json
CHANGED
File without changes
|
tokenizer.json
CHANGED
File without changes
|
tokenizer_config.json
CHANGED
File without changes
|
train_all.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import GPT2LMHeadModel, AutoTokenizer
|
2 |
+
from transformers import AdamW, get_scheduler, set_seed
|
3 |
+
from datasets import load_dataset
|
4 |
+
from accelerate import Accelerator
|
5 |
+
import datasets, transformers
|
6 |
+
from huggingface_hub import Repository
|
7 |
+
|
8 |
+
from torch.utils.data import IterableDataset
|
9 |
+
from torch.utils.data.dataloader import DataLoader
|
10 |
+
from torch.utils.tensorboard import SummaryWriter
|
11 |
+
from argparse import Namespace
|
12 |
+
import torch
|
13 |
+
import logging
|
14 |
+
import wandb
|
15 |
+
|
16 |
+
|
17 |
+
class ConstantLengthDataset(IterableDataset):
|
18 |
+
def __init__(
|
19 |
+
self,
|
20 |
+
tokenizer,
|
21 |
+
dataset,
|
22 |
+
da_type,
|
23 |
+
seq_length=1024,
|
24 |
+
num_of_sequences=1024,
|
25 |
+
chars_per_token=5.2,
|
26 |
+
):
|
27 |
+
self.tokenizer = tokenizer
|
28 |
+
self.concat_token_id = tokenizer.bos_token_id
|
29 |
+
self.dataset = dataset
|
30 |
+
self.da_type = da_type
|
31 |
+
self.seq_length = seq_length
|
32 |
+
self.input_characters = seq_length * chars_per_token * num_of_sequences
|
33 |
+
|
34 |
+
def __iter__(self):
|
35 |
+
iterator = iter(self.dataset[f"{self.da_type}"])
|
36 |
+
more_examples = True
|
37 |
+
while more_examples:
|
38 |
+
buffer, buffer_len = [], 0
|
39 |
+
while True:
|
40 |
+
if buffer_len >= self.input_characters:
|
41 |
+
break
|
42 |
+
try:
|
43 |
+
buffer.append(next(iterator)["text"])
|
44 |
+
buffer_len += len(buffer[-1])
|
45 |
+
except StopIteration:
|
46 |
+
more_examples = False
|
47 |
+
break
|
48 |
+
tokenized_inputs = tokenizer(buffer, truncation=False)["input_ids"]
|
49 |
+
all_token_ids = []
|
50 |
+
for tokenized_input in tokenized_inputs:
|
51 |
+
all_token_ids.extend(tokenized_input + [self.concat_token_id])
|
52 |
+
for i in range(0, len(all_token_ids), self.seq_length):
|
53 |
+
input_ids = all_token_ids[i : i + self.seq_length]
|
54 |
+
if len(input_ids) == self.seq_length:
|
55 |
+
yield torch.tensor(input_ids)
|
56 |
+
|
57 |
+
|
58 |
+
def setup_logging(project_name):
|
59 |
+
logger = logging.getLogger(__name__)
|
60 |
+
logging.basicConfig(
|
61 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
62 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
63 |
+
level=logging.INFO,
|
64 |
+
handlers=[
|
65 |
+
logging.FileHandler(f"log/debug_{accelerator.process_index}.log"),
|
66 |
+
logging.StreamHandler(),
|
67 |
+
],
|
68 |
+
)
|
69 |
+
if accelerator.is_main_process: # we only want to setup logging once
|
70 |
+
wandb.init(project=project_name, config=args)
|
71 |
+
run_name = wandb.run.name
|
72 |
+
tb_writer = SummaryWriter()
|
73 |
+
tb_writer.add_hparams(vars(args), {"0": 0})
|
74 |
+
logger.setLevel(logging.INFO)
|
75 |
+
datasets.utils.logging.set_verbosity_info()
|
76 |
+
transformers.utils.logging.set_verbosity_info()
|
77 |
+
else:
|
78 |
+
tb_writer = None
|
79 |
+
run_name = ""
|
80 |
+
logger.setLevel(logging.ERROR)
|
81 |
+
datasets.utils.logging.set_verbosity_error()
|
82 |
+
transformers.utils.logging.set_verbosity_error()
|
83 |
+
return logger, tb_writer, run_name
|
84 |
+
|
85 |
+
|
86 |
+
def create_dataloaders(args):
|
87 |
+
ds_kwargs = {"streaming": True}
|
88 |
+
train_data = load_dataset(
|
89 |
+
"text", data_files={"train": ["train_raw.txt"]}, **ds_kwargs
|
90 |
+
)
|
91 |
+
train_data = train_data.shuffle(buffer_size=args.shuffle_buffer, seed=args.seed)
|
92 |
+
valid_data = load_dataset(
|
93 |
+
"text", data_files={"valid": ["valid_raw.txt"]}, **ds_kwargs
|
94 |
+
)
|
95 |
+
train_dataset = ConstantLengthDataset(
|
96 |
+
tokenizer, train_data, da_type="train", seq_length=args.seq_length
|
97 |
+
)
|
98 |
+
valid_dataset = ConstantLengthDataset(
|
99 |
+
tokenizer, valid_data, da_type="valid", seq_length=args.seq_length
|
100 |
+
)
|
101 |
+
train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size)
|
102 |
+
eval_dataloader = DataLoader(valid_dataset, batch_size=args.valid_batch_size)
|
103 |
+
return train_dataloader, eval_dataloader
|
104 |
+
|
105 |
+
|
106 |
+
def get_grouped_params(model, args, no_decay=["bias", "LayerNorm.weight"]):
|
107 |
+
params_with_wd, params_without_wd = [], []
|
108 |
+
for n, p in model.named_parameters():
|
109 |
+
if any(nd in n for nd in no_decay):
|
110 |
+
params_without_wd.append(p)
|
111 |
+
else:
|
112 |
+
params_with_wd.append(p)
|
113 |
+
return [
|
114 |
+
{"params": params_with_wd, "weight_decay": args.weight_decay},
|
115 |
+
{"params": params_without_wd, "weight_decay": 0.0},
|
116 |
+
]
|
117 |
+
|
118 |
+
|
119 |
+
def log_metrics(step, metrics):
|
120 |
+
logger.info(f"Step {step}: {metrics}")
|
121 |
+
if accelerator.is_main_process:
|
122 |
+
wandb.log(metrics)
|
123 |
+
[tb_writer.add_scalar(k, v, step) for k, v in metrics.items()]
|
124 |
+
|
125 |
+
|
126 |
+
def evaluate(args):
|
127 |
+
model.eval()
|
128 |
+
losses = []
|
129 |
+
for step, batch in enumerate(eval_dataloader):
|
130 |
+
with torch.no_grad():
|
131 |
+
outputs = model(batch, labels=batch)
|
132 |
+
loss = outputs.loss.repeat(args.valid_batch_size)
|
133 |
+
losses.append(accelerator.gather(loss))
|
134 |
+
if args.max_eval_steps > 0 and step >= args.max_eval_steps:
|
135 |
+
break
|
136 |
+
loss = torch.mean(torch.cat(losses))
|
137 |
+
try:
|
138 |
+
perplexity = torch.exp(loss)
|
139 |
+
except OverflowError:
|
140 |
+
perplexity = float("inf")
|
141 |
+
return loss.item(), perplexity.item()
|
142 |
+
|
143 |
+
|
144 |
+
# Accelerator
|
145 |
+
accelerator = Accelerator(dispatch_batches=True)
|
146 |
+
acc_state = {str(k): str(v) for k, v in accelerator.state.__dict__.items()}
|
147 |
+
# Hyperparameters
|
148 |
+
project_name = "krupalkp/custom_llm-small"
|
149 |
+
dataset_name = "../codeparrot"
|
150 |
+
config = {
|
151 |
+
"train_batch_size": 2,
|
152 |
+
"valid_batch_size": 2,
|
153 |
+
"weight_decay": 0.1,
|
154 |
+
"shuffle_buffer": 1_000,
|
155 |
+
"learning_rate": 2e-4,
|
156 |
+
"lr_scheduler_type": "cosine",
|
157 |
+
"num_warmup_steps": 750,
|
158 |
+
"gradient_accumulation_steps": 16,
|
159 |
+
"max_train_steps": 50_000,
|
160 |
+
"max_eval_steps": -1,
|
161 |
+
"seq_length": 1024,
|
162 |
+
"seed": 1,
|
163 |
+
"save_checkpoint_steps": 50_000,
|
164 |
+
}
|
165 |
+
args = Namespace(**config, **acc_state)
|
166 |
+
samples_per_step = accelerator.state.num_processes * args.train_batch_size
|
167 |
+
set_seed(args.seed)
|
168 |
+
|
169 |
+
# Logging
|
170 |
+
logger, tb_writer, run_name = setup_logging(project_name.split("/")[1])
|
171 |
+
logger.info(accelerator.state)
|
172 |
+
|
173 |
+
# Load model and tokenizer
|
174 |
+
if accelerator.is_main_process:
|
175 |
+
hf_repo = Repository("./", clone_from=project_name, revision=run_name)
|
176 |
+
model = GPT2LMHeadModel.from_pretrained("./")
|
177 |
+
tokenizer = AutoTokenizer.from_pretrained("./")
|
178 |
+
|
179 |
+
# Load dataset and dataloader
|
180 |
+
train_dataloader, eval_dataloader = create_dataloaders(args)
|
181 |
+
|
182 |
+
# Prepare the optimizer and learning rate scheduler
|
183 |
+
optimizer = AdamW(get_grouped_params(model, args), lr=args.learning_rate)
|
184 |
+
lr_scheduler = get_scheduler(
|
185 |
+
name=args.lr_scheduler_type,
|
186 |
+
optimizer=optimizer,
|
187 |
+
num_warmup_steps=args.num_warmup_steps,
|
188 |
+
num_training_steps=args.max_train_steps,
|
189 |
+
)
|
190 |
+
|
191 |
+
|
192 |
+
def get_lr():
|
193 |
+
return optimizer.param_groups[0]["lr"]
|
194 |
+
|
195 |
+
|
196 |
+
# Prepare everything with our `accelerator`.
|
197 |
+
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
|
198 |
+
model, optimizer, train_dataloader, eval_dataloader
|
199 |
+
)
|
200 |
+
|
201 |
+
# Train model
|
202 |
+
model.train()
|
203 |
+
completed_steps = 0
|
204 |
+
for step, batch in enumerate(train_dataloader, start=1):
|
205 |
+
loss = model(batch, labels=batch, use_cache=False).loss
|
206 |
+
log_metrics(
|
207 |
+
step,
|
208 |
+
{
|
209 |
+
"lr": get_lr(),
|
210 |
+
"samples": step * samples_per_step,
|
211 |
+
"steps": completed_steps,
|
212 |
+
"loss/train": loss.item(),
|
213 |
+
},
|
214 |
+
)
|
215 |
+
loss = loss / args.gradient_accumulation_steps
|
216 |
+
accelerator.backward(loss)
|
217 |
+
if step % args.gradient_accumulation_steps == 0:
|
218 |
+
accelerator.clip_grad_norm_(model.parameters(), 1.0)
|
219 |
+
optimizer.step()
|
220 |
+
lr_scheduler.step()
|
221 |
+
optimizer.zero_grad()
|
222 |
+
completed_steps += 1
|
223 |
+
if step % args.save_checkpoint_steps == 0:
|
224 |
+
logger.info("Evaluating and saving model checkpoint")
|
225 |
+
eval_loss, perplexity = evaluate(args)
|
226 |
+
log_metrics(step, {"loss/eval": eval_loss, "perplexity": perplexity})
|
227 |
+
accelerator.wait_for_everyone()
|
228 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
229 |
+
if accelerator.is_main_process:
|
230 |
+
unwrapped_model.save_pretrained("./")
|
231 |
+
hf_repo.push_to_hub(commit_message=f"step {step}")
|
232 |
+
model.train()
|
233 |
+
if completed_steps >= args.max_train_steps:
|
234 |
+
break
|
235 |
+
|
236 |
+
# Evaluate and save the last checkpoint
|
237 |
+
logger.info("Evaluating and saving model after training")
|
238 |
+
eval_loss, perplexity = evaluate(args)
|
239 |
+
log_metrics(step, {"loss/eval": eval_loss, "perplexity": perplexity})
|
240 |
+
accelerator.wait_for_everyone()
|
241 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
242 |
+
if accelerator.is_main_process:
|
243 |
+
unwrapped_model.save_pretrained("./")
|
244 |
+
hf_repo.push_to_hub(commit_message=f"final model")
|
train_raw.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
valid_raw.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
vocab.json
CHANGED
File without changes
|
wandb/debug-internal.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
run-20230608_122450-vrqnfbac/logs/debug-internal.log
|
wandb/debug.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
run-20230608_122450-vrqnfbac/logs/debug.log
|
wandb/latest-run
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
run-20230608_122450-vrqnfbac
|
wandb/run-20230608_122450-vrqnfbac/files/config.yaml
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
train_batch_size:
|
4 |
+
desc: null
|
5 |
+
value: 2
|
6 |
+
valid_batch_size:
|
7 |
+
desc: null
|
8 |
+
value: 2
|
9 |
+
weight_decay:
|
10 |
+
desc: null
|
11 |
+
value: 0.1
|
12 |
+
shuffle_buffer:
|
13 |
+
desc: null
|
14 |
+
value: 1000
|
15 |
+
learning_rate:
|
16 |
+
desc: null
|
17 |
+
value: 0.0002
|
18 |
+
lr_scheduler_type:
|
19 |
+
desc: null
|
20 |
+
value: cosine
|
21 |
+
num_warmup_steps:
|
22 |
+
desc: null
|
23 |
+
value: 750
|
24 |
+
gradient_accumulation_steps:
|
25 |
+
desc: null
|
26 |
+
value: 16
|
27 |
+
max_train_steps:
|
28 |
+
desc: null
|
29 |
+
value: 50000
|
30 |
+
max_eval_steps:
|
31 |
+
desc: null
|
32 |
+
value: -1
|
33 |
+
seq_length:
|
34 |
+
desc: null
|
35 |
+
value: 1024
|
36 |
+
seed:
|
37 |
+
desc: null
|
38 |
+
value: 1
|
39 |
+
save_checkpoint_steps:
|
40 |
+
desc: null
|
41 |
+
value: 50000
|
42 |
+
_cpu:
|
43 |
+
desc: null
|
44 |
+
value: 'True'
|
45 |
+
backend:
|
46 |
+
desc: null
|
47 |
+
value: None
|
48 |
+
device:
|
49 |
+
desc: null
|
50 |
+
value: cpu
|
51 |
+
distributed_type:
|
52 |
+
desc: null
|
53 |
+
value: DistributedType.NO
|
54 |
+
num_processes:
|
55 |
+
desc: null
|
56 |
+
value: '1'
|
57 |
+
process_index:
|
58 |
+
desc: null
|
59 |
+
value: '0'
|
60 |
+
local_process_index:
|
61 |
+
desc: null
|
62 |
+
value: '0'
|
63 |
+
fork_launched:
|
64 |
+
desc: null
|
65 |
+
value: 'False'
|
66 |
+
deepspeed_plugin:
|
67 |
+
desc: null
|
68 |
+
value: None
|
69 |
+
dynamo_plugin:
|
70 |
+
desc: null
|
71 |
+
value: 'TorchDynamoPlugin(backend=<DynamoBackend.INDUCTOR: ''INDUCTOR''>, mode=''default'',
|
72 |
+
fullgraph=True, dynamic=True, options=None, disable=False)'
|
73 |
+
_mixed_precision:
|
74 |
+
desc: null
|
75 |
+
value: fp16
|
76 |
+
use_ipex:
|
77 |
+
desc: null
|
78 |
+
value: 'False'
|
79 |
+
_wandb:
|
80 |
+
desc: null
|
81 |
+
value:
|
82 |
+
python_version: 3.8.10
|
83 |
+
cli_version: 0.15.4
|
84 |
+
framework: huggingface
|
85 |
+
huggingface_version: 4.29.2
|
86 |
+
is_jupyter_run: false
|
87 |
+
is_kaggle_kernel: false
|
88 |
+
start_time: 1686227090.643913
|
89 |
+
t:
|
90 |
+
1:
|
91 |
+
- 1
|
92 |
+
- 11
|
93 |
+
- 49
|
94 |
+
- 51
|
95 |
+
- 55
|
96 |
+
- 71
|
97 |
+
2:
|
98 |
+
- 1
|
99 |
+
- 11
|
100 |
+
- 49
|
101 |
+
- 51
|
102 |
+
- 55
|
103 |
+
- 71
|
104 |
+
3:
|
105 |
+
- 16
|
106 |
+
- 23
|
107 |
+
4: 3.8.10
|
108 |
+
5: 0.15.4
|
109 |
+
6: 4.29.2
|
110 |
+
8:
|
111 |
+
- 5
|
wandb/run-20230608_122450-vrqnfbac/files/output.log
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
06/08/2023 12:24:51 - INFO - __main__ - Distributed environment: NO
|
2 |
+
Num processes: 1
|
3 |
+
Process index: 0
|
4 |
+
Local process index: 0
|
5 |
+
Device: cpu
|
6 |
+
Mixed precision type: fp16
|
7 |
+
/workspace/custom_llm-small/./ is already a clone of https://huggingface.co/krupalkp/custom_llm-small. Make sure you pull the latest changes with `repo.git_pull()`.
|
8 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository - /workspace/custom_llm-small/./ is already a clone of https://huggingface.co/krupalkp/custom_llm-small. Make sure you pull the latest changes with `repo.git_pull()`.
|
9 |
+
Revision `glorious-sound-1` does not exist. Created and checked out branch `glorious-sound-1`.
|
10 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository - Revision `glorious-sound-1` does not exist. Created and checked out branch `glorious-sound-1`.
|
11 |
+
06/08/2023 12:24:51 - WARNING - huggingface_hub.repository -
|
12 |
+
loading configuration file ./config.json
|
13 |
+
Model config GPT2Config {
|
14 |
+
"_name_or_path": "gpt2",
|
15 |
+
"activation_function": "gelu_new",
|
16 |
+
"architectures": [
|
17 |
+
"GPT2LMHeadModel"
|
18 |
+
],
|
19 |
+
"attn_pdrop": 0.1,
|
20 |
+
"bos_token_id": 50256,
|
21 |
+
"embd_pdrop": 0.1,
|
22 |
+
"eos_token_id": 50256,
|
23 |
+
"initializer_range": 0.02,
|
24 |
+
"layer_norm_epsilon": 1e-05,
|
25 |
+
"model_type": "gpt2",
|
26 |
+
"n_ctx": 1024,
|
27 |
+
"n_embd": 768,
|
28 |
+
"n_head": 12,
|
29 |
+
"n_inner": null,
|
30 |
+
"n_layer": 12,
|
31 |
+
"n_positions": 1024,
|
32 |
+
"reorder_and_upcast_attn": false,
|
33 |
+
"resid_pdrop": 0.1,
|
34 |
+
"scale_attn_by_inverse_layer_idx": false,
|
35 |
+
"scale_attn_weights": true,
|
36 |
+
"summary_activation": null,
|
37 |
+
"summary_first_dropout": 0.1,
|
38 |
+
"summary_proj_to_labels": true,
|
39 |
+
"summary_type": "cls_index",
|
40 |
+
"summary_use_proj": true,
|
41 |
+
"task_specific_params": {
|
42 |
+
"text-generation": {
|
43 |
+
"do_sample": true,
|
44 |
+
"max_length": 50
|
45 |
+
}
|
46 |
+
},
|
47 |
+
"torch_dtype": "float32",
|
48 |
+
"transformers_version": "4.29.2",
|
49 |
+
"use_cache": true,
|
50 |
+
"vocab_size": 16110
|
51 |
+
}
|
52 |
+
loading weights file ./pytorch_model.bin
|
53 |
+
Generate config GenerationConfig {
|
54 |
+
"_from_model_config": true,
|
55 |
+
"bos_token_id": 50256,
|
56 |
+
"eos_token_id": 50256,
|
57 |
+
"transformers_version": "4.29.2"
|
58 |
+
}
|
59 |
+
All model checkpoint weights were used when initializing GPT2LMHeadModel.
|
60 |
+
All the weights of GPT2LMHeadModel were initialized from the model checkpoint at ./.
|
61 |
+
If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.
|
62 |
+
loading configuration file ./generation_config.json
|
63 |
+
Generate config GenerationConfig {
|
64 |
+
"_from_model_config": true,
|
65 |
+
"bos_token_id": 50256,
|
66 |
+
"eos_token_id": 50256,
|
67 |
+
"transformers_version": "4.29.2"
|
68 |
+
}
|
69 |
+
loading file vocab.json
|
70 |
+
loading file merges.txt
|
71 |
+
loading file tokenizer.json
|
72 |
+
loading file added_tokens.json
|
73 |
+
loading file special_tokens_map.json
|
74 |
+
loading file tokenizer_config.json
|
75 |
+
06/08/2023 12:24:52 - INFO - datasets.builder - Using custom data configuration default-0f955d751e26ae0d
|
76 |
+
06/08/2023 12:24:52 - INFO - datasets.info - Loading Dataset Infos from /workspace/envs/llmenv/lib/python3.8/site-packages/datasets/packaged_modules/text
|
77 |
+
06/08/2023 12:24:52 - INFO - datasets.builder - Using custom data configuration default-da36a6bce6dd6929
|
78 |
+
06/08/2023 12:24:52 - INFO - datasets.info - Loading Dataset Infos from /workspace/envs/llmenv/lib/python3.8/site-packages/datasets/packaged_modules/text
|
79 |
+
/workspace/envs/llmenv/lib/python3.8/site-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
|
80 |
+
warnings.warn(
|
81 |
+
Token indices sequence length is longer than the specified maximum sequence length for this model (1033 > 1024). Running this sequence through the model will result in indexing errors
|
82 |
+
[2023-06-08 12:24:53,491] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
|
83 |
+
[2023-06-08 12:24:55,457] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo done tracing forward (RETURN_VALUE)
|
84 |
+
[2023-06-08 12:24:55,518] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function debug_wrapper
|
85 |
+
[2023-06-08 12:25:04,988] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling FORWARDS graph 0
|
86 |
+
[2023-06-08 12:25:05,068] torch._inductor.utils: [WARNING] using triton random, expect difference from eager
|
87 |
+
[2023-06-08 12:25:57,945] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling FORWARDS graph 0
|
88 |
+
[2023-06-08 12:25:57,950] torch._dynamo.output_graph: [INFO] Step 2: done compiler function debug_wrapper
|
89 |
+
06/08/2023 12:26:05 - INFO - __main__ - Step 1: {'lr': 0.0, 'samples': 2, 'steps': 0, 'loss/train': 9.792549133300781}
|
90 |
+
[2023-06-08 12:26:05,508] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling BACKWARDS graph 0
|
91 |
+
[2023-06-08 12:26:47,002] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling BACKWARDS graph 0
|
92 |
+
06/08/2023 12:27:03 - INFO - __main__ - Step 2: {'lr': 0.0, 'samples': 4, 'steps': 0, 'loss/train': 9.825643539428711}
|
93 |
+
06/08/2023 12:27:19 - INFO - __main__ - Step 3: {'lr': 0.0, 'samples': 6, 'steps': 0, 'loss/train': 9.78059196472168}
|
94 |
+
06/08/2023 12:27:35 - INFO - __main__ - Step 4: {'lr': 0.0, 'samples': 8, 'steps': 0, 'loss/train': 9.781628608703613}
|
95 |
+
06/08/2023 12:27:51 - INFO - __main__ - Step 5: {'lr': 0.0, 'samples': 10, 'steps': 0, 'loss/train': 9.810882568359375}
|
96 |
+
06/08/2023 12:28:06 - INFO - __main__ - Step 6: {'lr': 0.0, 'samples': 12, 'steps': 0, 'loss/train': 9.808069229125977}
|
97 |
+
06/08/2023 12:28:22 - INFO - __main__ - Step 7: {'lr': 0.0, 'samples': 14, 'steps': 0, 'loss/train': 9.817597389221191}
|
98 |
+
06/08/2023 12:28:37 - INFO - __main__ - Step 8: {'lr': 0.0, 'samples': 16, 'steps': 0, 'loss/train': 9.784443855285645}
|
99 |
+
06/08/2023 12:28:53 - INFO - __main__ - Step 9: {'lr': 0.0, 'samples': 18, 'steps': 0, 'loss/train': 9.826574325561523}
|
100 |
+
06/08/2023 12:29:08 - INFO - __main__ - Step 10: {'lr': 0.0, 'samples': 20, 'steps': 0, 'loss/train': 9.826700210571289}
|
101 |
+
06/08/2023 12:29:24 - INFO - __main__ - Step 11: {'lr': 0.0, 'samples': 22, 'steps': 0, 'loss/train': 9.811628341674805}
|
102 |
+
06/08/2023 12:29:40 - INFO - __main__ - Step 12: {'lr': 0.0, 'samples': 24, 'steps': 0, 'loss/train': 9.823099136352539}
|
103 |
+
06/08/2023 12:29:56 - INFO - __main__ - Step 13: {'lr': 0.0, 'samples': 26, 'steps': 0, 'loss/train': 9.831729888916016}
|
104 |
+
06/08/2023 12:30:12 - INFO - __main__ - Step 14: {'lr': 0.0, 'samples': 28, 'steps': 0, 'loss/train': 9.839056015014648}
|
105 |
+
06/08/2023 12:30:28 - INFO - __main__ - Step 15: {'lr': 0.0, 'samples': 30, 'steps': 0, 'loss/train': 9.804789543151855}
|
106 |
+
06/08/2023 12:30:45 - INFO - __main__ - Step 16: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 9.805603981018066}
|
107 |
+
06/08/2023 12:31:02 - INFO - __main__ - Step 17: {'lr': 2.6666666666666667e-07, 'samples': 34, 'steps': 1, 'loss/train': 9.789372444152832}
|
108 |
+
06/08/2023 12:31:18 - INFO - __main__ - Step 18: {'lr': 2.6666666666666667e-07, 'samples': 36, 'steps': 1, 'loss/train': 9.841607093811035}
|
109 |
+
06/08/2023 12:31:35 - INFO - __main__ - Step 19: {'lr': 2.6666666666666667e-07, 'samples': 38, 'steps': 1, 'loss/train': 9.838142395019531}
|
110 |
+
06/08/2023 12:31:51 - INFO - __main__ - Step 20: {'lr': 2.6666666666666667e-07, 'samples': 40, 'steps': 1, 'loss/train': 9.802177429199219}
|
111 |
+
06/08/2023 12:32:07 - INFO - __main__ - Step 21: {'lr': 2.6666666666666667e-07, 'samples': 42, 'steps': 1, 'loss/train': 9.837615013122559}
|
112 |
+
06/08/2023 12:32:23 - INFO - __main__ - Step 22: {'lr': 2.6666666666666667e-07, 'samples': 44, 'steps': 1, 'loss/train': 9.80981731414795}
|
113 |
+
06/08/2023 12:32:40 - INFO - __main__ - Step 23: {'lr': 2.6666666666666667e-07, 'samples': 46, 'steps': 1, 'loss/train': 9.793614387512207}
|
114 |
+
06/08/2023 12:32:56 - INFO - __main__ - Step 24: {'lr': 2.6666666666666667e-07, 'samples': 48, 'steps': 1, 'loss/train': 9.803434371948242}
|
115 |
+
06/08/2023 12:33:12 - INFO - __main__ - Step 25: {'lr': 2.6666666666666667e-07, 'samples': 50, 'steps': 1, 'loss/train': 9.80640697479248}
|
116 |
+
06/08/2023 12:33:28 - INFO - __main__ - Step 26: {'lr': 2.6666666666666667e-07, 'samples': 52, 'steps': 1, 'loss/train': 9.839242935180664}
|
117 |
+
06/08/2023 12:33:44 - INFO - __main__ - Step 27: {'lr': 2.6666666666666667e-07, 'samples': 54, 'steps': 1, 'loss/train': 9.837196350097656}
|
118 |
+
06/08/2023 12:34:00 - INFO - __main__ - Step 28: {'lr': 2.6666666666666667e-07, 'samples': 56, 'steps': 1, 'loss/train': 9.830636978149414}
|
119 |
+
06/08/2023 12:34:16 - INFO - __main__ - Step 29: {'lr': 2.6666666666666667e-07, 'samples': 58, 'steps': 1, 'loss/train': 9.835775375366211}
|
120 |
+
06/08/2023 12:34:32 - INFO - __main__ - Step 30: {'lr': 2.6666666666666667e-07, 'samples': 60, 'steps': 1, 'loss/train': 9.797348976135254}
|
121 |
+
06/08/2023 12:34:48 - INFO - __main__ - Step 31: {'lr': 2.6666666666666667e-07, 'samples': 62, 'steps': 1, 'loss/train': 9.817122459411621}
|
122 |
+
06/08/2023 12:35:04 - INFO - __main__ - Step 32: {'lr': 2.6666666666666667e-07, 'samples': 64, 'steps': 1, 'loss/train': 9.825984001159668}
|
123 |
+
06/08/2023 12:35:20 - INFO - __main__ - Step 33: {'lr': 5.333333333333333e-07, 'samples': 66, 'steps': 2, 'loss/train': 9.822331428527832}
|
124 |
+
06/08/2023 12:35:36 - INFO - __main__ - Step 34: {'lr': 5.333333333333333e-07, 'samples': 68, 'steps': 2, 'loss/train': 9.810147285461426}
|
125 |
+
06/08/2023 12:35:53 - INFO - __main__ - Step 35: {'lr': 5.333333333333333e-07, 'samples': 70, 'steps': 2, 'loss/train': 9.826034545898438}
|
126 |
+
06/08/2023 12:36:09 - INFO - __main__ - Step 36: {'lr': 5.333333333333333e-07, 'samples': 72, 'steps': 2, 'loss/train': 9.794151306152344}
|
127 |
+
06/08/2023 12:36:25 - INFO - __main__ - Step 37: {'lr': 5.333333333333333e-07, 'samples': 74, 'steps': 2, 'loss/train': 9.828431129455566}
|
128 |
+
06/08/2023 12:36:41 - INFO - __main__ - Step 38: {'lr': 5.333333333333333e-07, 'samples': 76, 'steps': 2, 'loss/train': 9.776195526123047}
|
129 |
+
06/08/2023 12:36:57 - INFO - __main__ - Step 39: {'lr': 5.333333333333333e-07, 'samples': 78, 'steps': 2, 'loss/train': 9.791631698608398}
|
130 |
+
06/08/2023 12:37:13 - INFO - __main__ - Step 40: {'lr': 5.333333333333333e-07, 'samples': 80, 'steps': 2, 'loss/train': 9.781876564025879}
|
131 |
+
06/08/2023 12:37:29 - INFO - __main__ - Step 41: {'lr': 5.333333333333333e-07, 'samples': 82, 'steps': 2, 'loss/train': 9.809560775756836}
|
132 |
+
06/08/2023 12:37:45 - INFO - __main__ - Step 42: {'lr': 5.333333333333333e-07, 'samples': 84, 'steps': 2, 'loss/train': 9.816283226013184}
|
133 |
+
06/08/2023 12:38:01 - INFO - __main__ - Step 43: {'lr': 5.333333333333333e-07, 'samples': 86, 'steps': 2, 'loss/train': 9.819095611572266}
|
134 |
+
06/08/2023 12:38:17 - INFO - __main__ - Step 44: {'lr': 5.333333333333333e-07, 'samples': 88, 'steps': 2, 'loss/train': 9.795587539672852}
|
135 |
+
06/08/2023 12:38:34 - INFO - __main__ - Step 45: {'lr': 5.333333333333333e-07, 'samples': 90, 'steps': 2, 'loss/train': 9.788451194763184}
|
136 |
+
06/08/2023 12:38:50 - INFO - __main__ - Step 46: {'lr': 5.333333333333333e-07, 'samples': 92, 'steps': 2, 'loss/train': 9.802919387817383}
|
137 |
+
06/08/2023 12:39:06 - INFO - __main__ - Step 47: {'lr': 5.333333333333333e-07, 'samples': 94, 'steps': 2, 'loss/train': 9.7972993850708}
|
138 |
+
06/08/2023 12:39:22 - INFO - __main__ - Step 48: {'lr': 5.333333333333333e-07, 'samples': 96, 'steps': 2, 'loss/train': 9.824687957763672}
|
139 |
+
06/08/2023 12:39:38 - INFO - __main__ - Step 49: {'lr': 8.000000000000001e-07, 'samples': 98, 'steps': 3, 'loss/train': 9.786107063293457}
|
140 |
+
06/08/2023 12:39:54 - INFO - __main__ - Step 50: {'lr': 8.000000000000001e-07, 'samples': 100, 'steps': 3, 'loss/train': 9.771675109863281}
|
141 |
+
06/08/2023 12:40:11 - INFO - __main__ - Step 51: {'lr': 8.000000000000001e-07, 'samples': 102, 'steps': 3, 'loss/train': 9.784013748168945}
|
142 |
+
06/08/2023 12:40:27 - INFO - __main__ - Step 52: {'lr': 8.000000000000001e-07, 'samples': 104, 'steps': 3, 'loss/train': 9.798379898071289}
|
143 |
+
06/08/2023 12:40:43 - INFO - __main__ - Step 53: {'lr': 8.000000000000001e-07, 'samples': 106, 'steps': 3, 'loss/train': 9.767139434814453}
|
144 |
+
06/08/2023 12:40:59 - INFO - __main__ - Step 54: {'lr': 8.000000000000001e-07, 'samples': 108, 'steps': 3, 'loss/train': 9.783173561096191}
|
145 |
+
06/08/2023 12:41:16 - INFO - __main__ - Step 55: {'lr': 8.000000000000001e-07, 'samples': 110, 'steps': 3, 'loss/train': 9.81434154510498}
|
146 |
+
06/08/2023 12:41:33 - INFO - __main__ - Step 56: {'lr': 8.000000000000001e-07, 'samples': 112, 'steps': 3, 'loss/train': 9.798585891723633}
|
147 |
+
06/08/2023 12:41:49 - INFO - __main__ - Step 57: {'lr': 8.000000000000001e-07, 'samples': 114, 'steps': 3, 'loss/train': 9.779496192932129}
|
148 |
+
06/08/2023 12:42:06 - INFO - __main__ - Step 58: {'lr': 8.000000000000001e-07, 'samples': 116, 'steps': 3, 'loss/train': 9.75149154663086}
|
149 |
+
06/08/2023 12:42:22 - INFO - __main__ - Step 59: {'lr': 8.000000000000001e-07, 'samples': 118, 'steps': 3, 'loss/train': 9.797645568847656}
|
150 |
+
06/08/2023 12:42:38 - INFO - __main__ - Step 60: {'lr': 8.000000000000001e-07, 'samples': 120, 'steps': 3, 'loss/train': 9.783336639404297}
|
151 |
+
06/08/2023 12:42:54 - INFO - __main__ - Step 61: {'lr': 8.000000000000001e-07, 'samples': 122, 'steps': 3, 'loss/train': 9.805188179016113}
|
152 |
+
06/08/2023 12:43:10 - INFO - __main__ - Step 62: {'lr': 8.000000000000001e-07, 'samples': 124, 'steps': 3, 'loss/train': 9.794000625610352}
|
153 |
+
06/08/2023 12:43:26 - INFO - __main__ - Step 63: {'lr': 8.000000000000001e-07, 'samples': 126, 'steps': 3, 'loss/train': 9.763993263244629}
|
154 |
+
06/08/2023 12:43:42 - INFO - __main__ - Step 64: {'lr': 8.000000000000001e-07, 'samples': 128, 'steps': 3, 'loss/train': 9.760546684265137}
|
155 |
+
06/08/2023 12:43:58 - INFO - __main__ - Step 65: {'lr': 1.0666666666666667e-06, 'samples': 130, 'steps': 4, 'loss/train': 9.741477966308594}
|
156 |
+
06/08/2023 12:44:14 - INFO - __main__ - Step 66: {'lr': 1.0666666666666667e-06, 'samples': 132, 'steps': 4, 'loss/train': 9.758099555969238}
|
157 |
+
06/08/2023 12:44:30 - INFO - __main__ - Step 67: {'lr': 1.0666666666666667e-06, 'samples': 134, 'steps': 4, 'loss/train': 9.758442878723145}
|
158 |
+
06/08/2023 12:44:46 - INFO - __main__ - Step 68: {'lr': 1.0666666666666667e-06, 'samples': 136, 'steps': 4, 'loss/train': 9.744771003723145}
|
159 |
+
06/08/2023 12:45:03 - INFO - __main__ - Step 69: {'lr': 1.0666666666666667e-06, 'samples': 138, 'steps': 4, 'loss/train': 9.757477760314941}
|
160 |
+
06/08/2023 12:45:19 - INFO - __main__ - Step 70: {'lr': 1.0666666666666667e-06, 'samples': 140, 'steps': 4, 'loss/train': 9.75220775604248}
|
161 |
+
06/08/2023 12:45:35 - INFO - __main__ - Step 71: {'lr': 1.0666666666666667e-06, 'samples': 142, 'steps': 4, 'loss/train': 9.75396728515625}
|
162 |
+
06/08/2023 12:45:51 - INFO - __main__ - Step 72: {'lr': 1.0666666666666667e-06, 'samples': 144, 'steps': 4, 'loss/train': 9.736096382141113}
|
163 |
+
06/08/2023 12:46:08 - INFO - __main__ - Step 73: {'lr': 1.0666666666666667e-06, 'samples': 146, 'steps': 4, 'loss/train': 9.764381408691406}
|
164 |
+
06/08/2023 12:46:24 - INFO - __main__ - Step 74: {'lr': 1.0666666666666667e-06, 'samples': 148, 'steps': 4, 'loss/train': 9.774300575256348}
|
165 |
+
06/08/2023 12:46:40 - INFO - __main__ - Step 75: {'lr': 1.0666666666666667e-06, 'samples': 150, 'steps': 4, 'loss/train': 9.743051528930664}
|
166 |
+
06/08/2023 12:46:56 - INFO - __main__ - Step 76: {'lr': 1.0666666666666667e-06, 'samples': 152, 'steps': 4, 'loss/train': 9.746865272521973}
|
167 |
+
06/08/2023 12:47:12 - INFO - __main__ - Step 77: {'lr': 1.0666666666666667e-06, 'samples': 154, 'steps': 4, 'loss/train': 9.73295783996582}
|
168 |
+
06/08/2023 12:47:28 - INFO - __main__ - Step 78: {'lr': 1.0666666666666667e-06, 'samples': 156, 'steps': 4, 'loss/train': 9.772175788879395}
|
169 |
+
06/08/2023 12:47:44 - INFO - __main__ - Step 79: {'lr': 1.0666666666666667e-06, 'samples': 158, 'steps': 4, 'loss/train': 9.710450172424316}
|
170 |
+
06/08/2023 12:48:00 - INFO - __main__ - Step 80: {'lr': 1.0666666666666667e-06, 'samples': 160, 'steps': 4, 'loss/train': 9.737425804138184}
|
171 |
+
06/08/2023 12:48:16 - INFO - __main__ - Step 81: {'lr': 1.3333333333333334e-06, 'samples': 162, 'steps': 5, 'loss/train': 9.721009254455566}
|
172 |
+
06/08/2023 12:48:32 - INFO - __main__ - Step 82: {'lr': 1.3333333333333334e-06, 'samples': 164, 'steps': 5, 'loss/train': 9.658642768859863}
|
173 |
+
06/08/2023 12:48:49 - INFO - __main__ - Step 83: {'lr': 1.3333333333333334e-06, 'samples': 166, 'steps': 5, 'loss/train': 9.73045825958252}
|
174 |
+
06/08/2023 12:49:05 - INFO - __main__ - Step 84: {'lr': 1.3333333333333334e-06, 'samples': 168, 'steps': 5, 'loss/train': 9.729884147644043}
|
175 |
+
06/08/2023 12:49:21 - INFO - __main__ - Step 85: {'lr': 1.3333333333333334e-06, 'samples': 170, 'steps': 5, 'loss/train': 9.716988563537598}
|
176 |
+
06/08/2023 12:49:37 - INFO - __main__ - Step 86: {'lr': 1.3333333333333334e-06, 'samples': 172, 'steps': 5, 'loss/train': 9.710418701171875}
|
177 |
+
06/08/2023 12:49:53 - INFO - __main__ - Step 87: {'lr': 1.3333333333333334e-06, 'samples': 174, 'steps': 5, 'loss/train': 9.705856323242188}
|
178 |
+
06/08/2023 12:50:09 - INFO - __main__ - Step 88: {'lr': 1.3333333333333334e-06, 'samples': 176, 'steps': 5, 'loss/train': 9.682978630065918}
|
179 |
+
06/08/2023 12:50:26 - INFO - __main__ - Step 89: {'lr': 1.3333333333333334e-06, 'samples': 178, 'steps': 5, 'loss/train': 9.713265419006348}
|
180 |
+
06/08/2023 12:50:42 - INFO - __main__ - Step 90: {'lr': 1.3333333333333334e-06, 'samples': 180, 'steps': 5, 'loss/train': 9.70463752746582}
|
181 |
+
06/08/2023 12:50:58 - INFO - __main__ - Step 91: {'lr': 1.3333333333333334e-06, 'samples': 182, 'steps': 5, 'loss/train': 9.685354232788086}
|
182 |
+
06/08/2023 12:51:14 - INFO - __main__ - Step 92: {'lr': 1.3333333333333334e-06, 'samples': 184, 'steps': 5, 'loss/train': 9.699443817138672}
|
183 |
+
06/08/2023 12:51:30 - INFO - __main__ - Step 93: {'lr': 1.3333333333333334e-06, 'samples': 186, 'steps': 5, 'loss/train': 9.695199966430664}
|
184 |
+
06/08/2023 12:51:46 - INFO - __main__ - Step 94: {'lr': 1.3333333333333334e-06, 'samples': 188, 'steps': 5, 'loss/train': 9.740874290466309}
|
185 |
+
06/08/2023 12:52:02 - INFO - __main__ - Step 95: {'lr': 1.3333333333333334e-06, 'samples': 190, 'steps': 5, 'loss/train': 9.701812744140625}
|
186 |
+
06/08/2023 12:52:19 - INFO - __main__ - Step 96: {'lr': 1.3333333333333334e-06, 'samples': 192, 'steps': 5, 'loss/train': 9.722161293029785}
|
187 |
+
[2023-06-08 12:52:29,215] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
|
188 |
+
[2023-06-08 12:52:30,998] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo done tracing forward (RETURN_VALUE)
|
189 |
+
[2023-06-08 12:52:31,050] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function debug_wrapper
|
190 |
+
[2023-06-08 12:52:38,232] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling FORWARDS graph 1
|
191 |
+
[2023-06-08 12:52:38,268] torch._inductor.utils: [WARNING] using triton random, expect difference from eager
|
192 |
+
[2023-06-08 12:53:22,928] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling FORWARDS graph 1
|
193 |
+
[2023-06-08 12:53:22,934] torch._dynamo.output_graph: [INFO] Step 2: done compiler function debug_wrapper
|
194 |
+
06/08/2023 12:53:26 - INFO - __main__ - Step 97: {'lr': 1.6000000000000001e-06, 'samples': 194, 'steps': 6, 'loss/train': 9.66638469696045}
|
195 |
+
[2023-06-08 12:53:26,397] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling BACKWARDS graph 1
|
196 |
+
[2023-06-08 12:54:07,569] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling BACKWARDS graph 1
|
197 |
+
06/08/2023 12:54:12 - INFO - __main__ - Evaluating and saving model after training
|
198 |
+
[2023-06-08 12:54:12,607] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo start tracing forward
|
199 |
+
[2023-06-08 12:54:14,365] torch._dynamo.symbolic_convert: [INFO] Step 1: torchdynamo done tracing forward (RETURN_VALUE)
|
200 |
+
[2023-06-08 12:54:14,424] torch._dynamo.output_graph: [INFO] Step 2: calling compiler function debug_wrapper
|
201 |
+
[2023-06-08 12:54:18,505] torch._inductor.compile_fx: [INFO] Step 3: torchinductor compiling FORWARDS graph 2
|
202 |
+
[2023-06-08 12:54:32,854] torch._inductor.compile_fx: [INFO] Step 3: torchinductor done compiling FORWARDS graph 2
|
203 |
+
[2023-06-08 12:54:32,859] torch._dynamo.output_graph: [INFO] Step 2: done compiler function debug_wrapper
|
204 |
+
06/08/2023 12:56:32 - INFO - __main__ - Step 97: {'loss/eval': 9.62712574005127, 'perplexity': 15170.7685546875}
|
205 |
+
Configuration saved in ./config.json
|
206 |
+
Configuration saved in ./generation_config.json
|
207 |
+
Model weights saved in ./pytorch_model.bin
|
208 |
+
Traceback (most recent call last):
|
209 |
+
File "train_all.py", line 244, in <module>
|
210 |
+
hf_repo.push_to_hub(commit_message=f"final model")
|
211 |
+
File "/workspace/envs/llmenv/lib/python3.8/site-packages/huggingface_hub/repository.py", line 1305, in push_to_hub
|
212 |
+
self.git_add(auto_lfs_track=True)
|
213 |
+
File "/workspace/envs/llmenv/lib/python3.8/site-packages/huggingface_hub/repository.py", line 1009, in git_add
|
214 |
+
tracked_files.extend(self.auto_track_binary_files(pattern))
|
215 |
+
File "/workspace/envs/llmenv/lib/python3.8/site-packages/huggingface_hub/repository.py", line 903, in auto_track_binary_files
|
216 |
+
is_binary = is_binary_file(path_to_file)
|
217 |
+
File "/workspace/envs/llmenv/lib/python3.8/site-packages/huggingface_hub/repository.py", line 230, in is_binary_file
|
218 |
+
with open(filename, "rb") as f:
|
219 |
+
IsADirectoryError: [Errno 21] Is a directory: '/workspace/custom_llm-small/./wandb/latest-run'
|
wandb/run-20230608_122450-vrqnfbac/files/requirements.txt
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==1.4.0
|
2 |
+
accelerate==0.20.1
|
3 |
+
aiohttp==3.8.4
|
4 |
+
aiosignal==1.3.1
|
5 |
+
anyio==3.7.0
|
6 |
+
appdirs==1.4.4
|
7 |
+
argon2-cffi-bindings==21.2.0
|
8 |
+
argon2-cffi==21.3.0
|
9 |
+
arrow==1.2.3
|
10 |
+
asttokens==2.2.1
|
11 |
+
async-lru==2.0.2
|
12 |
+
async-timeout==4.0.2
|
13 |
+
attrs==23.1.0
|
14 |
+
babel==2.12.1
|
15 |
+
backcall==0.2.0
|
16 |
+
beautifulsoup4==4.12.2
|
17 |
+
bleach==6.0.0
|
18 |
+
cachetools==5.3.1
|
19 |
+
certifi==2023.5.7
|
20 |
+
cffi==1.15.1
|
21 |
+
charset-normalizer==3.1.0
|
22 |
+
click==8.1.3
|
23 |
+
cmake==3.26.4
|
24 |
+
comm==0.1.3
|
25 |
+
datasets==2.12.0
|
26 |
+
debugpy==1.6.7
|
27 |
+
decorator==5.1.1
|
28 |
+
defusedxml==0.7.1
|
29 |
+
dill==0.3.6
|
30 |
+
docker-pycreds==0.4.0
|
31 |
+
exceptiongroup==1.1.1
|
32 |
+
executing==1.2.0
|
33 |
+
fastjsonschema==2.17.1
|
34 |
+
filelock==3.12.0
|
35 |
+
fqdn==1.5.1
|
36 |
+
frozenlist==1.3.3
|
37 |
+
fsspec==2023.5.0
|
38 |
+
gitdb==4.0.10
|
39 |
+
gitpython==3.1.31
|
40 |
+
google-auth-oauthlib==1.0.0
|
41 |
+
google-auth==2.19.1
|
42 |
+
grpcio==1.54.2
|
43 |
+
huggingface-hub==0.15.1
|
44 |
+
idna==3.4
|
45 |
+
importlib-metadata==6.6.0
|
46 |
+
importlib-resources==5.12.0
|
47 |
+
ipykernel==6.23.1
|
48 |
+
ipython-genutils==0.2.0
|
49 |
+
ipython==8.12.2
|
50 |
+
ipywidgets==8.0.6
|
51 |
+
isoduration==20.11.0
|
52 |
+
jedi==0.18.2
|
53 |
+
jinja2==3.1.2
|
54 |
+
json5==0.9.14
|
55 |
+
jsonpointer==2.3
|
56 |
+
jsonschema==4.17.3
|
57 |
+
jupyter-client==8.2.0
|
58 |
+
jupyter-console==6.6.3
|
59 |
+
jupyter-core==5.3.0
|
60 |
+
jupyter-events==0.6.3
|
61 |
+
jupyter-lsp==2.2.0
|
62 |
+
jupyter-server-terminals==0.4.4
|
63 |
+
jupyter-server==2.6.0
|
64 |
+
jupyter==1.0.0
|
65 |
+
jupyterlab-pygments==0.2.2
|
66 |
+
jupyterlab-server==2.22.1
|
67 |
+
jupyterlab-widgets==3.0.7
|
68 |
+
jupyterlab==4.0.1
|
69 |
+
lit==16.0.5.post0
|
70 |
+
markdown==3.4.3
|
71 |
+
markupsafe==2.1.3
|
72 |
+
matplotlib-inline==0.1.6
|
73 |
+
mistune==2.0.5
|
74 |
+
mpmath==1.3.0
|
75 |
+
multidict==6.0.4
|
76 |
+
multiprocess==0.70.14
|
77 |
+
nbclassic==1.0.0
|
78 |
+
nbclient==0.8.0
|
79 |
+
nbconvert==7.4.0
|
80 |
+
nbformat==5.9.0
|
81 |
+
nest-asyncio==1.5.6
|
82 |
+
networkx==3.1
|
83 |
+
notebook-shim==0.2.3
|
84 |
+
notebook==6.5.4
|
85 |
+
numpy==1.24.3
|
86 |
+
nvidia-cublas-cu11==11.10.3.66
|
87 |
+
nvidia-cuda-cupti-cu11==11.7.101
|
88 |
+
nvidia-cuda-nvrtc-cu11==11.7.99
|
89 |
+
nvidia-cuda-runtime-cu11==11.7.99
|
90 |
+
nvidia-cudnn-cu11==8.5.0.96
|
91 |
+
nvidia-cufft-cu11==10.9.0.58
|
92 |
+
nvidia-curand-cu11==10.2.10.91
|
93 |
+
nvidia-cusolver-cu11==11.4.0.1
|
94 |
+
nvidia-cusparse-cu11==11.7.4.91
|
95 |
+
nvidia-nccl-cu11==2.14.3
|
96 |
+
nvidia-nvtx-cu11==11.7.91
|
97 |
+
oauthlib==3.2.2
|
98 |
+
overrides==7.3.1
|
99 |
+
packaging==23.1
|
100 |
+
pandas==2.0.2
|
101 |
+
pandocfilters==1.5.0
|
102 |
+
parso==0.8.3
|
103 |
+
pathtools==0.1.2
|
104 |
+
pexpect==4.8.0
|
105 |
+
pickleshare==0.7.5
|
106 |
+
pip==23.1.2
|
107 |
+
pkgutil-resolve-name==1.3.10
|
108 |
+
platformdirs==3.5.1
|
109 |
+
prometheus-client==0.17.0
|
110 |
+
prompt-toolkit==3.0.38
|
111 |
+
protobuf==4.23.2
|
112 |
+
psutil==5.9.5
|
113 |
+
ptyprocess==0.7.0
|
114 |
+
pure-eval==0.2.2
|
115 |
+
pyarrow==12.0.0
|
116 |
+
pyasn1-modules==0.3.0
|
117 |
+
pyasn1==0.5.0
|
118 |
+
pycparser==2.21
|
119 |
+
pygments==2.15.1
|
120 |
+
pyrsistent==0.19.3
|
121 |
+
python-dateutil==2.8.2
|
122 |
+
python-json-logger==2.0.7
|
123 |
+
pytz==2023.3
|
124 |
+
pyyaml==6.0
|
125 |
+
pyzmq==25.1.0
|
126 |
+
qtconsole==5.4.3
|
127 |
+
qtpy==2.3.1
|
128 |
+
regex==2023.6.3
|
129 |
+
requests-oauthlib==1.3.1
|
130 |
+
requests==2.31.0
|
131 |
+
responses==0.18.0
|
132 |
+
rfc3339-validator==0.1.4
|
133 |
+
rfc3986-validator==0.1.1
|
134 |
+
rsa==4.9
|
135 |
+
send2trash==1.8.2
|
136 |
+
sentry-sdk==1.25.1
|
137 |
+
setproctitle==1.3.2
|
138 |
+
setuptools==67.7.2
|
139 |
+
six==1.16.0
|
140 |
+
smmap==5.0.0
|
141 |
+
sniffio==1.3.0
|
142 |
+
soupsieve==2.4.1
|
143 |
+
stack-data==0.6.2
|
144 |
+
sympy==1.12
|
145 |
+
tensorboard-data-server==0.7.0
|
146 |
+
tensorboard==2.13.0
|
147 |
+
terminado==0.17.1
|
148 |
+
tinycss2==1.2.1
|
149 |
+
tokenizers==0.13.3
|
150 |
+
tomli==2.0.1
|
151 |
+
torch==2.0.1
|
152 |
+
tornado==6.3.2
|
153 |
+
tqdm==4.65.0
|
154 |
+
traitlets==5.9.0
|
155 |
+
transformers==4.29.2
|
156 |
+
triton==2.0.0
|
157 |
+
typing-extensions==4.6.3
|
158 |
+
tzdata==2023.3
|
159 |
+
uri-template==1.2.0
|
160 |
+
urllib3==1.26.16
|
161 |
+
wandb==0.15.4
|
162 |
+
wcwidth==0.2.6
|
163 |
+
webcolors==1.13
|
164 |
+
webencodings==0.5.1
|
165 |
+
websocket-client==1.5.2
|
166 |
+
werkzeug==2.3.5
|
167 |
+
wheel==0.40.0
|
168 |
+
widgetsnbextension==4.0.7
|
169 |
+
xxhash==3.2.0
|
170 |
+
yarl==1.9.2
|
171 |
+
zipp==3.15.0
|
wandb/run-20230608_122450-vrqnfbac/files/wandb-metadata.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.15.0-1034-aws-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2023-06-08T12:24:50.973990",
|
5 |
+
"startedAt": "2023-06-08T12:24:50.637418",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [],
|
9 |
+
"state": "running",
|
10 |
+
"program": "train_all.py",
|
11 |
+
"codePath": "train_all.py",
|
12 |
+
"git": {
|
13 |
+
"remote": "https://huggingface.co/krupalkp/custom_llm-small",
|
14 |
+
"commit": "fa712672eb21ef9096828ca756ee5738f9743137"
|
15 |
+
},
|
16 |
+
"email": null,
|
17 |
+
"root": "/workspace/custom_llm-small",
|
18 |
+
"host": "2aaab01b09a9",
|
19 |
+
"username": "root",
|
20 |
+
"executable": "/workspace/envs/llmenv/bin/python",
|
21 |
+
"cpu_count": 2,
|
22 |
+
"cpu_count_logical": 4,
|
23 |
+
"cpu_freq": {
|
24 |
+
"current": 2799.9982499999996,
|
25 |
+
"min": 0.0,
|
26 |
+
"max": 0.0
|
27 |
+
},
|
28 |
+
"cpu_freq_per_core": [
|
29 |
+
{
|
30 |
+
"current": 3100.042,
|
31 |
+
"min": 0.0,
|
32 |
+
"max": 0.0
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"current": 2499.998,
|
36 |
+
"min": 0.0,
|
37 |
+
"max": 0.0
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"current": 3099.955,
|
41 |
+
"min": 0.0,
|
42 |
+
"max": 0.0
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"current": 2499.998,
|
46 |
+
"min": 0.0,
|
47 |
+
"max": 0.0
|
48 |
+
}
|
49 |
+
],
|
50 |
+
"disk": {
|
51 |
+
"total": 72.63036346435547,
|
52 |
+
"used": 55.228641510009766
|
53 |
+
},
|
54 |
+
"gpu": "Tesla T4",
|
55 |
+
"gpu_count": 1,
|
56 |
+
"gpu_devices": [
|
57 |
+
{
|
58 |
+
"name": "Tesla T4",
|
59 |
+
"memory_total": 16106127360
|
60 |
+
}
|
61 |
+
],
|
62 |
+
"memory": {
|
63 |
+
"total": 15.337417602539062
|
64 |
+
}
|
65 |
+
}
|
wandb/run-20230608_122450-vrqnfbac/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr": 1.6000000000000001e-06, "samples": 194, "steps": 6, "loss/train": 9.66638469696045, "_timestamp": 1686228992.2342088, "_runtime": 1901.590295791626, "_step": 97, "loss/eval": 9.62712574005127, "perplexity": 15170.7685546875, "_wandb": {"runtime": 1905}}
|
wandb/run-20230608_122450-vrqnfbac/logs/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20230608_122450-vrqnfbac/logs/debug.log
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Current SDK version is 0.15.4
|
2 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Configure stats pid to 5503
|
3 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Loading settings from /workspace/custom_llm-small/wandb/settings
|
5 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
|
7 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'train_all.py', 'program': 'train_all.py'}
|
8 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:_log_setup():507] Logging user logs to /workspace/custom_llm-small/wandb/run-20230608_122450-vrqnfbac/logs/debug.log
|
9 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:_log_setup():508] Logging internal logs to /workspace/custom_llm-small/wandb/run-20230608_122450-vrqnfbac/logs/debug-internal.log
|
10 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:init():547] calling init triggers
|
11 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:init():554] wandb.init called with sweep_config: {}
|
12 |
+
config: {'train_batch_size': 2, 'valid_batch_size': 2, 'weight_decay': 0.1, 'shuffle_buffer': 1000, 'learning_rate': 0.0002, 'lr_scheduler_type': 'cosine', 'num_warmup_steps': 750, 'gradient_accumulation_steps': 16, 'max_train_steps': 50000, 'max_eval_steps': -1, 'seq_length': 1024, 'seed': 1, 'save_checkpoint_steps': 50000, '_cpu': 'True', 'backend': 'None', 'device': 'cpu', 'distributed_type': 'DistributedType.NO', 'num_processes': '1', 'process_index': '0', 'local_process_index': '0', 'fork_launched': 'False', 'deepspeed_plugin': 'None', 'dynamo_plugin': "TorchDynamoPlugin(backend=<DynamoBackend.INDUCTOR: 'INDUCTOR'>, mode='default', fullgraph=True, dynamic=True, options=None, disable=False)", '_mixed_precision': 'fp16', 'use_ipex': 'False'}
|
13 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:init():596] starting backend
|
14 |
+
2023-06-08 12:24:50,639 INFO MainThread:5503 [wandb_init.py:init():600] setting up manager
|
15 |
+
2023-06-08 12:24:50,641 INFO MainThread:5503 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
16 |
+
2023-06-08 12:24:50,643 INFO MainThread:5503 [wandb_init.py:init():606] backend started and connected
|
17 |
+
2023-06-08 12:24:50,647 INFO MainThread:5503 [wandb_init.py:init():703] updated telemetry
|
18 |
+
2023-06-08 12:24:50,662 INFO MainThread:5503 [wandb_init.py:init():736] communicating run to backend with 60.0 second timeout
|
19 |
+
2023-06-08 12:24:50,882 INFO MainThread:5503 [wandb_run.py:_on_init():2176] communicating current version
|
20 |
+
2023-06-08 12:24:50,908 INFO MainThread:5503 [wandb_run.py:_on_init():2185] got version response
|
21 |
+
2023-06-08 12:24:50,908 INFO MainThread:5503 [wandb_init.py:init():787] starting run threads in backend
|
22 |
+
2023-06-08 12:24:51,004 INFO MainThread:5503 [wandb_run.py:_console_start():2155] atexit reg
|
23 |
+
2023-06-08 12:24:51,004 INFO MainThread:5503 [wandb_run.py:_redirect():2010] redirect: SettingsConsole.WRAP_RAW
|
24 |
+
2023-06-08 12:24:51,005 INFO MainThread:5503 [wandb_run.py:_redirect():2075] Wrapping output streams.
|
25 |
+
2023-06-08 12:24:51,005 INFO MainThread:5503 [wandb_run.py:_redirect():2100] Redirects installed.
|
26 |
+
2023-06-08 12:24:51,006 INFO MainThread:5503 [wandb_init.py:init():828] run started, returning control to user process
|
27 |
+
2023-06-08 12:56:41,472 WARNING MsgRouterThr:5503 [router.py:message_loop():77] message_loop has been closed
|
wandb/run-20230608_122450-vrqnfbac/run-vrqnfbac.wandb
ADDED
Binary file (90.3 kB). View file
|
|