Adam
commited on
Commit
β’
e8da1ea
1
Parent(s):
9d04693
feat: updated links
Browse files- training.log +78 -78
training.log
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
-
/home/
|
2 |
warnings.warn(
|
3 |
[2023-04-14 07:44:46,752] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
|
4 |
-
[2023-04-14 07:44:48,341] [INFO] [runner.py:540:main] cmd = /home/
|
5 |
-
/home/
|
6 |
warnings.warn(
|
7 |
[2023-04-14 07:45:54,441] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
|
8 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
|
9 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
|
10 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:247:main] dist_world_size=8
|
11 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
12 |
-
/home/
|
13 |
warnings.warn(
|
14 |
-
/home/
|
15 |
warnings.warn(
|
16 |
-
/home/
|
17 |
warnings.warn(
|
18 |
-
/home/
|
19 |
warnings.warn(
|
20 |
-
/home/
|
21 |
warnings.warn(
|
22 |
-
/home/
|
23 |
warnings.warn(
|
24 |
-
/home/
|
25 |
warnings.warn(
|
26 |
-
/home/
|
27 |
warnings.warn(
|
28 |
[2023-04-14 07:49:22,604] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
|
29 |
|
@@ -32,99 +32,99 @@
|
|
32 |
|
33 |
|
34 |
|
35 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
36 |
-
|
37 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
38 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
39 |
|
40 |
50%|βββββ | 1/2 [00:00<00:00, 5.00it/s]
|
41 |
-
|
42 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
43 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
44 |
|
45 |
0%| | 0/2 [00:00<?, ?it/s]
|
46 |
50%|βββββ | 1/2 [00:03<00:03, 3.16s/it]
|
47 |
|
48 |
50%|βββββ | 1/2 [00:00<00:00, 2.25it/s]
|
49 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
50 |
-
|
51 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
52 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
53 |
|
54 |
0%| | 0/2 [00:00<?, ?it/s]
|
55 |
|
56 |
-
Found cached dataset parquet (/
|
57 |
|
58 |
0%| | 0/2 [00:00<?, ?it/s]
|
59 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
60 |
-
|
61 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
62 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
63 |
|
64 |
|
65 |
0%| | 0/2 [00:00<?, ?it/s]
|
66 |
50%|βββββ | 1/2 [00:03<00:03, 3.14s/it]
|
67 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
68 |
-
|
69 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
70 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
71 |
|
72 |
0%| | 0/2 [00:00<?, ?it/s]
|
73 |
|
74 |
-
Found cached dataset parquet (/
|
75 |
|
76 |
0%| | 0/2 [00:00<?, ?it/s]
|
77 |
-
Found cached dataset parquet (/
|
78 |
|
79 |
0%| | 0/2 [00:00<?, ?it/s]
|
80 |
50%|βββββ | 1/2 [00:00<00:00, 5.57it/s]
|
81 |
-
Found cached dataset parquet (/
|
82 |
|
83 |
0%| | 0/2 [00:00<?, ?it/s]
|
84 |
-
Found cached dataset parquet (/
|
85 |
|
86 |
0%| | 0/2 [00:00<?, ?it/s]
|
87 |
50%|βββββ | 1/2 [00:01<00:01, 1.01s/it]
|
88 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
89 |
-
|
90 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
91 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
92 |
|
93 |
50%|βββββ | 1/2 [00:00<00:00, 3.38it/s]
|
94 |
0%| | 0/2 [00:00<?, ?it/s]
|
95 |
|
96 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
97 |
-
|
98 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
99 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
100 |
|
101 |
-
|
102 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
103 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
104 |
|
105 |
-
Found cached dataset parquet (/
|
106 |
|
107 |
0%| | 0/1 [00:00<?, ?it/s]
|
108 |
|
109 |
0%| | 0/1 [00:00<?, ?it/s]
|
110 |
-
Found cached dataset parquet (/
|
111 |
|
112 |
0%| | 0/1 [00:00<?, ?it/s]
|
113 |
-
Found cached dataset parquet (/
|
114 |
|
115 |
0%| | 0/1 [00:00<?, ?it/s]
|
116 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
117 |
-
|
118 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
119 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
120 |
|
121 |
|
122 |
0%| | 0/1 [00:00<?, ?it/s]
|
123 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
124 |
-
|
125 |
0%| | 0/2 [00:00<?, ?it/s]
|
126 |
50%|βββββ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
127 |
0%| | 0/2 [00:00<?, ?it/s]
|
128 |
50%|βββββ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
129 |
|
130 |
-
|
131 |
0%| | 0/2 [00:00<?, ?it/s]
|
132 |
50%|βββββ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
133 |
0%| | 0/2 [00:00<?, ?it/s]
|
134 |
50%|βββββ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
135 |
|
136 |
|
137 |
0%| | 0/2 [00:00<?, ?it/s]
|
138 |
-
Found cached dataset parquet (/
|
139 |
|
140 |
0%| | 0/2 [00:00<?, ?it/s]
|
141 |
50%|βββββ | 1/2 [00:04<00:04, 4.27s/it]
|
142 |
-
Found cached dataset parquet (/
|
143 |
|
144 |
0%| | 0/2 [00:00<?, ?it/s]
|
145 |
-
Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
146 |
-
|
147 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
|
|
|
|
148 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
149 |
|
150 |
50%|βββββ | 1/2 [00:02<00:02, 2.74s/it]
|
151 |
0%| | 0/2 [00:00<?, ?it/s]
|
152 |
|
153 |
-
Found cached dataset parquet (/
|
154 |
|
155 |
0%| | 0/2 [00:00<?, ?it/s]
|
156 |
-
Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
157 |
-
|
158 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
|
|
|
|
159 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
160 |
|
161 |
|
162 |
0%| | 0/1 [00:00<?, ?it/s]
|
163 |
-
Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
164 |
-
|
165 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
|
|
|
|
166 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
167 |
|
168 |
|
169 |
0%| | 0/1 [00:00<?, ?it/s]
|
170 |
-
Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
171 |
-
|
172 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
|
|
|
|
173 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
174 |
|
175 |
|
176 |
0%| | 0/1 [00:00<?, ?it/s]
|
177 |
-
Found cached dataset webgpt_comparisons (/
|
178 |
-
Found cached dataset webgpt_comparisons (/
|
179 |
|
180 |
0%| | 0/1 [00:00<?, ?it/s]
|
181 |
0%| | 0/1 [00:00<?, ?it/s]
|
182 |
|
183 |
-
Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
184 |
-
|
185 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
186 |
-
|
187 |
0%| | 0/3 [00:00<?, ?it/s]
|
188 |
33%|ββββ | 1/3 [00:00<00:00, 2.07it/s]
|
189 |
67%|βββββββ | 2/3 [00:00<00:00, 2.16it/s]
|
190 |
67%|βββββββ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
|
|
191 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
192 |
0%| | 0/3 [00:00<?, ?it/s]
|
193 |
33%|ββββ | 1/3 [00:00<00:00, 2.07it/s]
|
194 |
67%|βββββββ | 2/3 [00:00<00:00, 2.16it/s]
|
195 |
67%|βββββββ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
196 |
|
197 |
|
198 |
-
|
199 |
0%| | 0/3 [00:00<?, ?it/s]
|
200 |
33%|ββββ | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
201 |
0%| | 0/3 [00:00<?, ?it/s]
|
202 |
33%|ββββ | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
203 |
|
204 |
67%|βββββββ | 2/3 [00:14<00:07, 7.06s/it]
|
205 |
0%| | 0/3 [00:00<?, ?it/s]
|
206 |
-
Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
207 |
-
|
208 |
0%| | 0/3 [00:00<?, ?it/s]
|
209 |
33%|ββββ | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
|
|
210 |
0%| | 0/3 [00:00<?, ?it/s]
|
211 |
33%|ββββ | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
212 |
|
213 |
|
214 |
-
|
215 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
216 |
-
|
217 |
33%|ββββ | 1/3 [00:03<00:06, 3.22s/it]
|
218 |
0%| | 0/3 [00:00<?, ?it/s]
|
219 |
67%|βββββββ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
220 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
221 |
33%|ββββ | 1/3 [00:03<00:06, 3.22s/it]
|
222 |
0%| | 0/3 [00:00<?, ?it/s]
|
223 |
67%|βββββββ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
224 |
|
225 |
33%|ββββ | 1/3 [00:02<00:05, 2.85s/it]
|
226 |
|
227 |
0%| | 0/3 [00:00<?, ?it/s]
|
228 |
67%|βββββββ | 2/3 [00:04<00:02, 2.25s/it]
|
229 |
|
@@ -186,18 +186,18 @@ To disable this warning, you can either:
|
|
186 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
187 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
188 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
189 |
-
Using /home/
|
190 |
-
Using /home/
|
191 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
192 |
-
Using /home/
|
193 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
194 |
-
Using /home/
|
195 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
196 |
-
Using /home/
|
197 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
198 |
-
Using /home/
|
199 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
200 |
-
Using /home/
|
201 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
202 |
To disable this warning, you can either:
|
203 |
- Avoid using `tokenizers` before the fork if possible
|
@@ -211,7 +211,7 @@ To disable this warning, you can either:
|
|
211 |
- Avoid using `tokenizers` before the fork if possible
|
212 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
213 |
Detected CUDA files, patching ldflags
|
214 |
-
Emitting ninja build file /home/
|
215 |
Building extension module fused_adam...
|
216 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
217 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
@@ -242,7 +242,7 @@ To disable this warning, you can either:
|
|
242 |
- Avoid using `tokenizers` before the fork if possible
|
243 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
244 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
245 |
-
Using /home/
|
246 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
247 |
To disable this warning, you can either:
|
248 |
- Avoid using `tokenizers` before the fork if possible
|
@@ -256,7 +256,7 @@ To disable this warning, you can either:
|
|
256 |
- Avoid using `tokenizers` before the fork if possible
|
257 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
258 |
Detected CUDA files, patching ldflags
|
259 |
-
Emitting ninja build file /home/
|
260 |
Building extension module fused_adam...
|
261 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
262 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
@@ -273,15 +273,15 @@ Time to load fused_adam op: 15.54004192352295 seconds
|
|
273 |
[2023-04-14 08:09:50,036] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
|
274 |
[2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
|
275 |
[2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] Creating fp16 optimizer with dynamic loss scale
|
276 |
-
Using /home/
|
277 |
-
Using /home/
|
278 |
-
Using /home/
|
279 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
|
280 |
-
Using /home/
|
281 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
|
282 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f281d4468b0>
|
283 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[5e-05, 5e-05], mom=[(0.9, 0.95), (0.9, 0.95)]
|
284 |
-
Using /home/
|
285 |
[2023-04-14 08:09:50,234] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
|
286 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] activation_checkpointing_config {
|
287 |
"partition_activations": false,
|
@@ -327,7 +327,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
|
|
327 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] communication_data_type ...... None
|
328 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
|
329 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False
|
330 |
-
Using /home/
|
331 |
|
332 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
|
333 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_enabled ...... False
|
@@ -344,7 +344,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
|
|
344 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01
|
345 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_verbose ........... False
|
346 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] elasticity_enabled ........... False
|
347 |
-
Using /home/
|
348 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] flops_profiler_config ........ {
|
349 |
"enabled": false,
|
350 |
"profile_step": 1,
|
@@ -429,7 +429,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
|
|
429 |
"tp_gather_partition_size": 8
|
430 |
}
|
431 |
}
|
432 |
-
Using /home/
|
433 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
434 |
To disable this warning, you can either:
|
435 |
- Avoid using `tokenizers` before the fork if possible
|
@@ -442,7 +442,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
|
|
442 |
To disable this warning, you can either:
|
443 |
- Avoid using `tokenizers` before the fork if possible
|
444 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
445 |
-
Emitting ninja build file /home/
|
446 |
Building extension module utils...
|
447 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
448 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
|
|
1 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
2 |
warnings.warn(
|
3 |
[2023-04-14 07:44:46,752] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
|
4 |
+
[2023-04-14 07:44:48,341] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --model_name_or_path facebook/opt-350m --num_padding_at_beginning 1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --max_seq_len 512 --learning_rate 5e-5 --weight_decay 0.1 --num_train_epochs 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --num_warmup_steps 0 --seed 1234 --zero_stage 0 --deepspeed --output_dir /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m
|
5 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
6 |
warnings.warn(
|
7 |
[2023-04-14 07:45:54,441] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
|
8 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
|
9 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
|
10 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:247:main] dist_world_size=8
|
11 |
[2023-04-14 07:45:54,643] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
12 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
13 |
warnings.warn(
|
14 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
15 |
warnings.warn(
|
16 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
17 |
warnings.warn(
|
18 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
19 |
warnings.warn(
|
20 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
21 |
warnings.warn(
|
22 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
23 |
warnings.warn(
|
24 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
25 |
warnings.warn(
|
26 |
+
/home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
|
27 |
warnings.warn(
|
28 |
[2023-04-14 07:49:22,604] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
|
29 |
|
|
|
32 |
|
33 |
|
34 |
|
|
|
|
|
35 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
36 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
37 |
+
|
38 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
39 |
|
40 |
50%|βββββ | 1/2 [00:00<00:00, 5.00it/s]
|
|
|
41 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
42 |
+
|
43 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
44 |
|
45 |
0%| | 0/2 [00:00<?, ?it/s]
|
46 |
50%|βββββ | 1/2 [00:03<00:03, 3.16s/it]
|
47 |
|
48 |
50%|βββββ | 1/2 [00:00<00:00, 2.25it/s]
|
|
|
|
|
49 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
50 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
51 |
+
|
52 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
53 |
|
54 |
0%| | 0/2 [00:00<?, ?it/s]
|
55 |
|
56 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
57 |
|
58 |
0%| | 0/2 [00:00<?, ?it/s]
|
|
|
|
|
59 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
60 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
61 |
+
|
62 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
63 |
|
64 |
|
65 |
0%| | 0/2 [00:00<?, ?it/s]
|
66 |
50%|βββββ | 1/2 [00:03<00:03, 3.14s/it]
|
|
|
|
|
67 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
68 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
69 |
+
|
70 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
71 |
|
72 |
0%| | 0/2 [00:00<?, ?it/s]
|
73 |
|
74 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
75 |
|
76 |
0%| | 0/2 [00:00<?, ?it/s]
|
77 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
78 |
|
79 |
0%| | 0/2 [00:00<?, ?it/s]
|
80 |
50%|βββββ | 1/2 [00:00<00:00, 5.57it/s]
|
81 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
82 |
|
83 |
0%| | 0/2 [00:00<?, ?it/s]
|
84 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
85 |
|
86 |
0%| | 0/2 [00:00<?, ?it/s]
|
87 |
50%|βββββ | 1/2 [00:01<00:01, 1.01s/it]
|
|
|
|
|
88 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
89 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
90 |
+
|
91 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
92 |
|
93 |
50%|βββββ | 1/2 [00:00<00:00, 3.38it/s]
|
94 |
0%| | 0/2 [00:00<?, ?it/s]
|
95 |
|
|
|
|
|
96 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
97 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
98 |
+
|
99 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
100 |
|
|
|
101 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
102 |
+
|
103 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
104 |
|
105 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
106 |
|
107 |
0%| | 0/1 [00:00<?, ?it/s]
|
108 |
|
109 |
0%| | 0/1 [00:00<?, ?it/s]
|
110 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
111 |
|
112 |
0%| | 0/1 [00:00<?, ?it/s]
|
113 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
114 |
|
115 |
0%| | 0/1 [00:00<?, ?it/s]
|
|
|
|
|
116 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
117 |
+
Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
118 |
+
|
119 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
120 |
|
121 |
|
122 |
0%| | 0/1 [00:00<?, ?it/s]
|
|
|
|
|
123 |
0%| | 0/2 [00:00<?, ?it/s]
|
124 |
50%|βββββ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
125 |
+
Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
126 |
+
|
127 |
0%| | 0/2 [00:00<?, ?it/s]
|
128 |
50%|βββββ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
129 |
|
|
|
130 |
0%| | 0/2 [00:00<?, ?it/s]
|
131 |
50%|βββββ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
132 |
+
|
133 |
0%| | 0/2 [00:00<?, ?it/s]
|
134 |
50%|βββββ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
135 |
|
136 |
|
137 |
0%| | 0/2 [00:00<?, ?it/s]
|
138 |
+
Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
139 |
|
140 |
0%| | 0/2 [00:00<?, ?it/s]
|
141 |
50%|βββββ | 1/2 [00:04<00:04, 4.27s/it]
|
142 |
+
Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
143 |
|
144 |
0%| | 0/2 [00:00<?, ?it/s]
|
|
|
|
|
145 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
146 |
+
Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
147 |
+
|
148 |
0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
149 |
|
150 |
50%|βββββ | 1/2 [00:02<00:02, 2.74s/it]
|
151 |
0%| | 0/2 [00:00<?, ?it/s]
|
152 |
|
153 |
+
Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
|
154 |
|
155 |
0%| | 0/2 [00:00<?, ?it/s]
|
|
|
|
|
156 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
157 |
+
Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
158 |
+
|
159 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
160 |
|
161 |
|
162 |
0%| | 0/1 [00:00<?, ?it/s]
|
|
|
|
|
163 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
164 |
+
Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
165 |
+
|
166 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
167 |
|
168 |
|
169 |
0%| | 0/1 [00:00<?, ?it/s]
|
|
|
|
|
170 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
171 |
+
Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
172 |
+
|
173 |
0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
174 |
|
175 |
|
176 |
0%| | 0/1 [00:00<?, ?it/s]
|
177 |
+
Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
178 |
+
Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
|
179 |
|
180 |
0%| | 0/1 [00:00<?, ?it/s]
|
181 |
0%| | 0/1 [00:00<?, ?it/s]
|
182 |
|
|
|
|
|
183 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
184 |
0%| | 0/3 [00:00<?, ?it/s]
|
185 |
33%|ββββ | 1/3 [00:00<00:00, 2.07it/s]
|
186 |
67%|βββββββ | 2/3 [00:00<00:00, 2.16it/s]
|
187 |
67%|βββββββ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
188 |
+
Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
189 |
+
|
190 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
191 |
+
|
192 |
0%| | 0/3 [00:00<?, ?it/s]
|
193 |
33%|ββββ | 1/3 [00:00<00:00, 2.07it/s]
|
194 |
67%|βββββββ | 2/3 [00:00<00:00, 2.16it/s]
|
195 |
67%|βββββββ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
196 |
|
197 |
|
|
|
198 |
0%| | 0/3 [00:00<?, ?it/s]
|
199 |
33%|ββββ | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
200 |
+
|
201 |
0%| | 0/3 [00:00<?, ?it/s]
|
202 |
33%|ββββ | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
203 |
|
204 |
67%|βββββββ | 2/3 [00:14<00:07, 7.06s/it]
|
205 |
0%| | 0/3 [00:00<?, ?it/s]
|
|
|
|
|
206 |
0%| | 0/3 [00:00<?, ?it/s]
|
207 |
33%|ββββ | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
208 |
+
Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
209 |
+
|
210 |
0%| | 0/3 [00:00<?, ?it/s]
|
211 |
33%|ββββ | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
212 |
|
213 |
|
|
|
214 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
|
|
215 |
33%|ββββ | 1/3 [00:03<00:06, 3.22s/it]
|
216 |
0%| | 0/3 [00:00<?, ?it/s]
|
217 |
67%|βββββββ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
218 |
+
|
219 |
0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
220 |
+
|
221 |
33%|ββββ | 1/3 [00:03<00:06, 3.22s/it]
|
222 |
0%| | 0/3 [00:00<?, ?it/s]
|
223 |
67%|βββββββ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
|
224 |
|
225 |
33%|ββββ | 1/3 [00:02<00:05, 2.85s/it]
|
226 |
|
227 |
0%| | 0/3 [00:00<?, ?it/s]
|
228 |
67%|βββββββ | 2/3 [00:04<00:02, 2.25s/it]
|
229 |
|
|
|
186 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
187 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
188 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
189 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
190 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
191 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
192 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
193 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
194 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
195 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
196 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
197 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
198 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
199 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
200 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
201 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
202 |
To disable this warning, you can either:
|
203 |
- Avoid using `tokenizers` before the fork if possible
|
|
|
211 |
- Avoid using `tokenizers` before the fork if possible
|
212 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
213 |
Detected CUDA files, patching ldflags
|
214 |
+
Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
|
215 |
Building extension module fused_adam...
|
216 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
217 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
|
|
242 |
- Avoid using `tokenizers` before the fork if possible
|
243 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
244 |
Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
|
245 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
246 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
247 |
To disable this warning, you can either:
|
248 |
- Avoid using `tokenizers` before the fork if possible
|
|
|
256 |
- Avoid using `tokenizers` before the fork if possible
|
257 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
258 |
Detected CUDA files, patching ldflags
|
259 |
+
Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
|
260 |
Building extension module fused_adam...
|
261 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
262 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
|
|
273 |
[2023-04-14 08:09:50,036] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
|
274 |
[2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
|
275 |
[2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] Creating fp16 optimizer with dynamic loss scale
|
276 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
277 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
278 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
279 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
|
280 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
281 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
|
282 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f281d4468b0>
|
283 |
[2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[5e-05, 5e-05], mom=[(0.9, 0.95), (0.9, 0.95)]
|
284 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
285 |
[2023-04-14 08:09:50,234] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
|
286 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] activation_checkpointing_config {
|
287 |
"partition_activations": false,
|
|
|
327 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] communication_data_type ...... None
|
328 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
|
329 |
[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False
|
330 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_params_legacy ..... False
|
331 |
|
332 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
|
333 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_enabled ...... False
|
|
|
344 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01
|
345 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_verbose ........... False
|
346 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] elasticity_enabled ........... False
|
347 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
348 |
[2023-04-14 08:09:50,236] [INFO] [config.py:957:print] flops_profiler_config ........ {
|
349 |
"enabled": false,
|
350 |
"profile_step": 1,
|
|
|
429 |
"tp_gather_partition_size": 8
|
430 |
}
|
431 |
}
|
432 |
+
Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
|
433 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
434 |
To disable this warning, you can either:
|
435 |
- Avoid using `tokenizers` before the fork if possible
|
|
|
442 |
To disable this warning, you can either:
|
443 |
- Avoid using `tokenizers` before the fork if possible
|
444 |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
445 |
+
Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/utils/build.ninja...
|
446 |
Building extension module utils...
|
447 |
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
|
448 |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|