picocreator commited on
Commit
560af7f
1 Parent(s): 8482e86

[GHA] experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb result notebook & reports (#199)

Browse files

- 293f0b77a3d614e98964fbf0e74513433c1a1643493a79225b1dce7ad11c0a2b (c0f4011acfe68bfa8b630f115d93ecdb62addd2c)

experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb CHANGED
@@ -3,13 +3,13 @@
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
- "id": "c4287c8b",
7
  "metadata": {
8
  "papermill": {
9
- "duration": 0.004479,
10
- "end_time": "2023-10-11T07:03:00.576797",
11
  "exception": false,
12
- "start_time": "2023-10-11T07:03:00.572318",
13
  "status": "completed"
14
  },
15
  "tags": []
@@ -23,13 +23,13 @@
23
  {
24
  "attachments": {},
25
  "cell_type": "markdown",
26
- "id": "5202003b",
27
  "metadata": {
28
  "papermill": {
29
- "duration": 0.002433,
30
- "end_time": "2023-10-11T07:03:00.582179",
31
  "exception": false,
32
- "start_time": "2023-10-11T07:03:00.579746",
33
  "status": "completed"
34
  },
35
  "tags": []
@@ -41,19 +41,19 @@
41
  {
42
  "cell_type": "code",
43
  "execution_count": 1,
44
- "id": "fc416bc2",
45
  "metadata": {
46
  "execution": {
47
- "iopub.execute_input": "2023-10-11T07:03:00.589288Z",
48
- "iopub.status.busy": "2023-10-11T07:03:00.588784Z",
49
- "iopub.status.idle": "2023-10-11T07:03:01.346279Z",
50
- "shell.execute_reply": "2023-10-11T07:03:01.345465Z"
51
  },
52
  "papermill": {
53
- "duration": 0.763505,
54
- "end_time": "2023-10-11T07:03:01.348385",
55
  "exception": false,
56
- "start_time": "2023-10-11T07:03:00.584880",
57
  "status": "completed"
58
  },
59
  "tags": []
@@ -69,19 +69,19 @@
69
  {
70
  "cell_type": "code",
71
  "execution_count": 2,
72
- "id": "a9dea07f",
73
  "metadata": {
74
  "execution": {
75
- "iopub.execute_input": "2023-10-11T07:03:01.355823Z",
76
- "iopub.status.busy": "2023-10-11T07:03:01.355275Z",
77
- "iopub.status.idle": "2023-10-11T07:03:01.363208Z",
78
- "shell.execute_reply": "2023-10-11T07:03:01.362389Z"
79
  },
80
  "papermill": {
81
- "duration": 0.013626,
82
- "end_time": "2023-10-11T07:03:01.364896",
83
  "exception": false,
84
- "start_time": "2023-10-11T07:03:01.351270",
85
  "status": "completed"
86
  },
87
  "tags": []
@@ -139,19 +139,19 @@
139
  {
140
  "cell_type": "code",
141
  "execution_count": 3,
142
- "id": "bcb68665",
143
  "metadata": {
144
  "execution": {
145
- "iopub.execute_input": "2023-10-11T07:03:01.372738Z",
146
- "iopub.status.busy": "2023-10-11T07:03:01.371949Z",
147
- "iopub.status.idle": "2023-10-11T07:03:02.581837Z",
148
- "shell.execute_reply": "2023-10-11T07:03:02.580259Z"
149
  },
150
  "papermill": {
151
- "duration": 1.216253,
152
- "end_time": "2023-10-11T07:03:02.584117",
153
  "exception": false,
154
- "start_time": "2023-10-11T07:03:01.367864",
155
  "status": "completed"
156
  },
157
  "tags": []
@@ -161,9 +161,14 @@
161
  "name": "stdout",
162
  "output_type": "stream",
163
  "text": [
164
- "--2023-10-11 07:03:01-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/blob/main/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n",
165
- "Resolving huggingface.co (huggingface.co)... 18.154.227.7, 18.154.227.87, 18.154.227.69, ...\r\n",
166
- "Connecting to huggingface.co (huggingface.co)|18.154.227.7|:443... connected.\r\n",
 
 
 
 
 
167
  "HTTP request sent, awaiting response... "
168
  ]
169
  },
@@ -172,14 +177,814 @@
172
  "output_type": "stream",
173
  "text": [
174
  "200 OK\r\n",
175
- "Length: 44360 (43K) [text/html]\r\n",
176
  "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n",
177
  "\r\n",
178
  "\r",
179
- " v5-L6-D20 0%[ ] 0 --.-KB/s \r",
180
- "v5-L6-D2048-E0_01-s 100%[===================>] 43.32K --.-KB/s in 0.001s \r\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  "\r\n",
182
- "2023-10-11 07:03:01 (46.3 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [44360/44360]\r\n",
183
  "\r\n"
184
  ]
185
  },
@@ -187,51 +992,814 @@
187
  "name": "stdout",
188
  "output_type": "stream",
189
  "text": [
190
- "--2023-10-11 07:03:02-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/blob/main/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n",
191
- "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.7, 18.154.227.69, ...\r\n",
192
- "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n",
193
- "HTTP request sent, awaiting response... "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  ]
195
  },
196
  {
197
  "name": "stdout",
198
  "output_type": "stream",
199
  "text": [
200
- "200 OK\r\n",
201
- "Length: 44360 (43K) [text/html]\r\n",
202
- "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n",
203
- "\r\n",
204
  "\r",
205
- " v5-L6-D20 0%[ ] 0 --.-KB/s \r",
206
- "v5-L6-D2048-E0_01-s 100%[===================>] 43.32K --.-KB/s in 0s \r\n",
207
  "\r\n",
208
- "2023-10-11 07:03:02 (215 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [44360/44360]\r\n",
209
  "\r\n"
210
  ]
211
  }
212
  ],
213
  "source": [
214
  "# Get the init split model, and finetune from there\n",
215
- "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/blob/main/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n",
216
- "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/blob/main/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\""
217
  ]
218
  },
219
  {
220
  "cell_type": "code",
221
  "execution_count": 4,
222
- "id": "81e32bce",
223
  "metadata": {
224
  "execution": {
225
- "iopub.execute_input": "2023-10-11T07:03:02.594064Z",
226
- "iopub.status.busy": "2023-10-11T07:03:02.592869Z",
227
- "iopub.status.idle": "2023-10-11T07:03:12.284528Z",
228
- "shell.execute_reply": "2023-10-11T07:03:12.283195Z"
229
  },
230
  "papermill": {
231
- "duration": 9.699478,
232
- "end_time": "2023-10-11T07:03:12.286978",
233
  "exception": false,
234
- "start_time": "2023-10-11T07:03:02.587500",
235
  "status": "completed"
236
  },
237
  "tags": []
@@ -250,7 +1818,7 @@
250
  "output_type": "stream",
251
  "text": [
252
  "\r",
253
- "Saving the dataset (0/2 shards): 7%| | 2000/27200 [00:00<00:01, 16059.79 examp"
254
  ]
255
  },
256
  {
@@ -258,7 +1826,7 @@
258
  "output_type": "stream",
259
  "text": [
260
  "\r",
261
- "Saving the dataset (0/2 shards): 15%|▏| 4000/27200 [00:00<00:01, 17165.62 examp"
262
  ]
263
  },
264
  {
@@ -266,7 +1834,7 @@
266
  "output_type": "stream",
267
  "text": [
268
  "\r",
269
- "Saving the dataset (0/2 shards): 22%|▏| 6000/27200 [00:00<00:01, 17873.84 examp"
270
  ]
271
  },
272
  {
@@ -274,7 +1842,7 @@
274
  "output_type": "stream",
275
  "text": [
276
  "\r",
277
- "Saving the dataset (0/2 shards): 29%|▎| 8000/27200 [00:00<00:01, 18442.04 examp"
278
  ]
279
  },
280
  {
@@ -282,7 +1850,7 @@
282
  "output_type": "stream",
283
  "text": [
284
  "\r",
285
- "Saving the dataset (0/2 shards): 40%|▍| 11000/27200 [00:00<00:00, 19214.27 exam"
286
  ]
287
  },
288
  {
@@ -290,8 +1858,8 @@
290
  "output_type": "stream",
291
  "text": [
292
  "\r",
293
- "Saving the dataset (0/2 shards): 50%|▌| 13600/27200 [00:00<00:00, 19584.96 exam\r",
294
- "Saving the dataset (1/2 shards): 50%|▌| 13600/27200 [00:00<00:00, 19584.96 exam"
295
  ]
296
  },
297
  {
@@ -299,7 +1867,7 @@
299
  "output_type": "stream",
300
  "text": [
301
  "\r",
302
- "Saving the dataset (1/2 shards): 65%|▋| 17600/27200 [00:00<00:00, 20456.63 exam"
303
  ]
304
  },
305
  {
@@ -307,7 +1875,7 @@
307
  "output_type": "stream",
308
  "text": [
309
  "\r",
310
- "Saving the dataset (1/2 shards): 79%|▊| 21600/27200 [00:01<00:00, 21374.91 exam"
311
  ]
312
  },
313
  {
@@ -315,7 +1883,7 @@
315
  "output_type": "stream",
316
  "text": [
317
  "\r",
318
- "Saving the dataset (1/2 shards): 94%|▉| 25600/27200 [00:01<00:00, 21799.50 exam"
319
  ]
320
  },
321
  {
@@ -323,18 +1891,12 @@
323
  "output_type": "stream",
324
  "text": [
325
  "\r",
326
- "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 21799.50 exam\r",
327
- "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 20461.58 exam\r\n"
328
- ]
329
- },
330
- {
331
- "name": "stdout",
332
- "output_type": "stream",
333
- "text": [
334
  "\r",
335
  "Saving the dataset (0/1 shards): 0%| | 0/109 [00:00<?, ? examples/s]\r",
336
- "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 7981.06 examples/\r",
337
- "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 7683.94 examples/\r\n"
338
  ]
339
  }
340
  ],
@@ -346,13 +1908,13 @@
346
  },
347
  {
348
  "cell_type": "markdown",
349
- "id": "5bf12f9e",
350
  "metadata": {
351
  "papermill": {
352
- "duration": 0.00368,
353
- "end_time": "2023-10-11T07:03:12.294651",
354
  "exception": false,
355
- "start_time": "2023-10-11T07:03:12.290971",
356
  "status": "completed"
357
  },
358
  "tags": []
@@ -364,19 +1926,19 @@
364
  {
365
  "cell_type": "code",
366
  "execution_count": 5,
367
- "id": "c1566af8",
368
  "metadata": {
369
  "execution": {
370
- "iopub.execute_input": "2023-10-11T07:03:12.305523Z",
371
- "iopub.status.busy": "2023-10-11T07:03:12.304583Z",
372
- "iopub.status.idle": "2023-10-11T07:03:29.051517Z",
373
- "shell.execute_reply": "2023-10-11T07:03:29.049989Z"
374
  },
375
  "papermill": {
376
- "duration": 16.755429,
377
- "end_time": "2023-10-11T07:03:29.053992",
378
  "exception": false,
379
- "start_time": "2023-10-11T07:03:12.298563",
380
  "status": "completed"
381
  },
382
  "tags": []
@@ -386,7 +1948,7 @@
386
  "name": "stdout",
387
  "output_type": "stream",
388
  "text": [
389
- "[2023-10-11 07:03:16,508] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
390
  ]
391
  },
392
  {
@@ -408,9 +1970,9 @@
408
  "name": "stdout",
409
  "output_type": "stream",
410
  "text": [
411
- "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3015291597\r\n",
412
  " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
413
- "Global seed set to 3015291597\r\n"
414
  ]
415
  },
416
  {
@@ -425,11 +1987,11 @@
425
  "output_type": "stream",
426
  "text": [
427
  "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
428
- "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_070319-yi5f0p8v\u001b[0m\r\n",
429
  "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
430
  "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
431
  "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
432
- "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/yi5f0p8v\u001b[0m\r\n"
433
  ]
434
  },
435
  {
@@ -471,10 +2033,10 @@
471
  "name": "stdout",
472
  "output_type": "stream",
473
  "text": [
474
- "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/yi5f0p8v\u001b[0m\r\n",
475
- "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v15\u001b[0m\r\n",
476
  "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
477
- "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_070319-yi5f0p8v/logs\u001b[0m\r\n"
478
  ]
479
  }
480
  ],
@@ -496,19 +2058,19 @@
496
  {
497
  "cell_type": "code",
498
  "execution_count": 6,
499
- "id": "d56d6a30",
500
  "metadata": {
501
  "execution": {
502
- "iopub.execute_input": "2023-10-11T07:03:29.066944Z",
503
- "iopub.status.busy": "2023-10-11T07:03:29.065449Z",
504
- "iopub.status.idle": "2023-10-11T07:03:32.793675Z",
505
- "shell.execute_reply": "2023-10-11T07:03:32.792045Z"
506
  },
507
  "papermill": {
508
- "duration": 3.737581,
509
- "end_time": "2023-10-11T07:03:32.796266",
510
  "exception": false,
511
- "start_time": "2023-10-11T07:03:29.058685",
512
  "status": "completed"
513
  },
514
  "tags": []
@@ -518,7 +2080,7 @@
518
  "name": "stdout",
519
  "output_type": "stream",
520
  "text": [
521
- "[2023-10-11 07:03:31,388] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
522
  ]
523
  },
524
  {
@@ -553,19 +2115,19 @@
553
  {
554
  "cell_type": "code",
555
  "execution_count": 7,
556
- "id": "f4c37d28",
557
  "metadata": {
558
  "execution": {
559
- "iopub.execute_input": "2023-10-11T07:03:32.809297Z",
560
- "iopub.status.busy": "2023-10-11T07:03:32.808263Z",
561
- "iopub.status.idle": "2023-10-11T07:03:39.200791Z",
562
- "shell.execute_reply": "2023-10-11T07:03:39.199726Z"
563
  },
564
  "papermill": {
565
- "duration": 6.402526,
566
- "end_time": "2023-10-11T07:03:39.203696",
567
  "exception": false,
568
- "start_time": "2023-10-11T07:03:32.801170",
569
  "status": "completed"
570
  },
571
  "tags": []
@@ -575,14 +2137,20 @@
575
  "name": "stdout",
576
  "output_type": "stream",
577
  "text": [
578
- "[2023-10-11 07:03:37,133] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
 
 
 
 
 
 
 
579
  ]
580
  },
581
  {
582
  "name": "stdout",
583
  "output_type": "stream",
584
  "text": [
585
- "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
586
  "Traceback (most recent call last):\r\n",
587
  " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in <module>\r\n",
588
  " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n",
@@ -602,13 +2170,13 @@
602
  },
603
  {
604
  "cell_type": "markdown",
605
- "id": "39b5c9e4",
606
  "metadata": {
607
  "papermill": {
608
- "duration": 0.006891,
609
- "end_time": "2023-10-11T07:03:39.217738",
610
  "exception": false,
611
- "start_time": "2023-10-11T07:03:39.210847",
612
  "status": "completed"
613
  },
614
  "tags": []
@@ -620,19 +2188,19 @@
620
  {
621
  "cell_type": "code",
622
  "execution_count": 8,
623
- "id": "6a2803dc",
624
  "metadata": {
625
  "execution": {
626
- "iopub.execute_input": "2023-10-11T07:03:39.235541Z",
627
- "iopub.status.busy": "2023-10-11T07:03:39.234393Z",
628
- "iopub.status.idle": "2023-10-11T07:03:54.515077Z",
629
- "shell.execute_reply": "2023-10-11T07:03:54.513988Z"
630
  },
631
  "papermill": {
632
- "duration": 15.292458,
633
- "end_time": "2023-10-11T07:03:54.517573",
634
  "exception": false,
635
- "start_time": "2023-10-11T07:03:39.225115",
636
  "status": "completed"
637
  },
638
  "tags": []
@@ -642,7 +2210,7 @@
642
  "name": "stdout",
643
  "output_type": "stream",
644
  "text": [
645
- "[2023-10-11 07:03:43,703] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
646
  ]
647
  },
648
  {
@@ -664,9 +2232,9 @@
664
  "name": "stdout",
665
  "output_type": "stream",
666
  "text": [
667
- "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 736269213\r\n",
668
  " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
669
- "Global seed set to 736269213\r\n"
670
  ]
671
  },
672
  {
@@ -681,11 +2249,11 @@
681
  "output_type": "stream",
682
  "text": [
683
  "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
684
- "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_070346-9urskvqu\u001b[0m\r\n",
685
  "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
686
  "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
687
  "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
688
- "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/9urskvqu\u001b[0m\r\n"
689
  ]
690
  },
691
  {
@@ -727,10 +2295,10 @@
727
  "name": "stdout",
728
  "output_type": "stream",
729
  "text": [
730
- "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/9urskvqu\u001b[0m\r\n",
731
- "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v15\u001b[0m\r\n",
732
  "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
733
- "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_070346-9urskvqu/logs\u001b[0m\r\n"
734
  ]
735
  }
736
  ],
@@ -752,19 +2320,19 @@
752
  {
753
  "cell_type": "code",
754
  "execution_count": 9,
755
- "id": "9f50e589",
756
  "metadata": {
757
  "execution": {
758
- "iopub.execute_input": "2023-10-11T07:03:54.533944Z",
759
- "iopub.status.busy": "2023-10-11T07:03:54.532500Z",
760
- "iopub.status.idle": "2023-10-11T07:03:58.532096Z",
761
- "shell.execute_reply": "2023-10-11T07:03:58.530934Z"
762
  },
763
  "papermill": {
764
- "duration": 4.010668,
765
- "end_time": "2023-10-11T07:03:58.534788",
766
  "exception": false,
767
- "start_time": "2023-10-11T07:03:54.524120",
768
  "status": "completed"
769
  },
770
  "tags": []
@@ -774,7 +2342,7 @@
774
  "name": "stdout",
775
  "output_type": "stream",
776
  "text": [
777
- "[2023-10-11 07:03:57,001] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
778
  ]
779
  },
780
  {
@@ -809,19 +2377,19 @@
809
  {
810
  "cell_type": "code",
811
  "execution_count": 10,
812
- "id": "777a63eb",
813
  "metadata": {
814
  "execution": {
815
- "iopub.execute_input": "2023-10-11T07:03:58.550672Z",
816
- "iopub.status.busy": "2023-10-11T07:03:58.550018Z",
817
- "iopub.status.idle": "2023-10-11T07:04:05.053290Z",
818
- "shell.execute_reply": "2023-10-11T07:04:05.052230Z"
819
  },
820
  "papermill": {
821
- "duration": 6.514677,
822
- "end_time": "2023-10-11T07:04:05.056026",
823
  "exception": false,
824
- "start_time": "2023-10-11T07:03:58.541349",
825
  "status": "completed"
826
  },
827
  "tags": []
@@ -831,7 +2399,7 @@
831
  "name": "stdout",
832
  "output_type": "stream",
833
  "text": [
834
- "[2023-10-11 07:04:02,965] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
835
  ]
836
  },
837
  {
@@ -877,14 +2445,14 @@
877
  },
878
  "papermill": {
879
  "default_parameters": {},
880
- "duration": 66.115721,
881
- "end_time": "2023-10-11T07:04:05.485131",
882
  "environment_variables": {},
883
  "exception": null,
884
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
885
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
886
  "parameters": {},
887
- "start_time": "2023-10-11T07:02:59.369410",
888
  "version": "2.4.0"
889
  }
890
  },
 
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
+ "id": "d3126ef2",
7
  "metadata": {
8
  "papermill": {
9
+ "duration": 0.004879,
10
+ "end_time": "2023-10-11T08:02:23.608034",
11
  "exception": false,
12
+ "start_time": "2023-10-11T08:02:23.603155",
13
  "status": "completed"
14
  },
15
  "tags": []
 
23
  {
24
  "attachments": {},
25
  "cell_type": "markdown",
26
+ "id": "986070aa",
27
  "metadata": {
28
  "papermill": {
29
+ "duration": 0.002523,
30
+ "end_time": "2023-10-11T08:02:23.613605",
31
  "exception": false,
32
+ "start_time": "2023-10-11T08:02:23.611082",
33
  "status": "completed"
34
  },
35
  "tags": []
 
41
  {
42
  "cell_type": "code",
43
  "execution_count": 1,
44
+ "id": "dc924c7f",
45
  "metadata": {
46
  "execution": {
47
+ "iopub.execute_input": "2023-10-11T08:02:23.620990Z",
48
+ "iopub.status.busy": "2023-10-11T08:02:23.620432Z",
49
+ "iopub.status.idle": "2023-10-11T08:02:24.379549Z",
50
+ "shell.execute_reply": "2023-10-11T08:02:24.378580Z"
51
  },
52
  "papermill": {
53
+ "duration": 0.765369,
54
+ "end_time": "2023-10-11T08:02:24.381741",
55
  "exception": false,
56
+ "start_time": "2023-10-11T08:02:23.616372",
57
  "status": "completed"
58
  },
59
  "tags": []
 
69
  {
70
  "cell_type": "code",
71
  "execution_count": 2,
72
+ "id": "2bbc32ac",
73
  "metadata": {
74
  "execution": {
75
+ "iopub.execute_input": "2023-10-11T08:02:24.389788Z",
76
+ "iopub.status.busy": "2023-10-11T08:02:24.389227Z",
77
+ "iopub.status.idle": "2023-10-11T08:02:24.398441Z",
78
+ "shell.execute_reply": "2023-10-11T08:02:24.397578Z"
79
  },
80
  "papermill": {
81
+ "duration": 0.015548,
82
+ "end_time": "2023-10-11T08:02:24.400362",
83
  "exception": false,
84
+ "start_time": "2023-10-11T08:02:24.384814",
85
  "status": "completed"
86
  },
87
  "tags": []
 
139
  {
140
  "cell_type": "code",
141
  "execution_count": 3,
142
+ "id": "ffa69634",
143
  "metadata": {
144
  "execution": {
145
+ "iopub.execute_input": "2023-10-11T08:02:24.408311Z",
146
+ "iopub.status.busy": "2023-10-11T08:02:24.407798Z",
147
+ "iopub.status.idle": "2023-10-11T08:03:19.634663Z",
148
+ "shell.execute_reply": "2023-10-11T08:03:19.633765Z"
149
  },
150
  "papermill": {
151
+ "duration": 55.233419,
152
+ "end_time": "2023-10-11T08:03:19.636895",
153
  "exception": false,
154
+ "start_time": "2023-10-11T08:02:24.403476",
155
  "status": "completed"
156
  },
157
  "tags": []
 
161
  "name": "stdout",
162
  "output_type": "stream",
163
  "text": [
164
+ "--2023-10-11 08:02:24-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\r\n",
165
+ "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.7, 18.154.227.69, ...\r\n",
166
+ "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n",
167
+ "HTTP request sent, awaiting response... 302 Found\r\n",
168
+ "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
169
+ "--2023-10-11 08:02:24-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/2f52085cee9c3db4bb079dc44edf50b0a19c170bd92128e918e6203efef83cea?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2a.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2a.pth%22%3B&Expires=1697270544&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU0NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzJmNTIwODVjZWU5YzNkYjRiYjA3OWRjNDRlZGY1MGIwYTE5YzE3MGJkOTIxMjhlOTE4ZTYyMDNlZmVmODNjZWE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AW451jyDioqxesXvDVp%7EgfYV3uhgFTDwTn3SlZa-gk-yCDb7c-QR44rTm9sWCGSJjaa%7EvJvj9zLGUK7fvbr%7E%7EGQJgL2L%7Es9vkVPg8qs1k%7EtCh-MX%7E45bxo4CapTIo8fx4xLJ738Tks8uzpx3Sy9hWbfuGQFCUwBHzJXG5uGNRzPv87Zdfy4gIIAt0NytaC3bFmKZl4DbXLF4%7EtVWXED7H3NAlBvGETdhjzK5Qr0FLZB2vqC1LQpPTexdTH-ETkPEIQpXRBV-JctzaKBfI1Da-tGpt4JdPlhyPIu1kaNtX13yTibuBrT-mDOy6OVJZ9Zsj%7EHdVUtDrdp-I01dhylHpQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
170
+ "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.69, 3.162.112.2, 3.162.112.100, ...\r\n",
171
+ "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.69|:443... connected.\r\n",
172
  "HTTP request sent, awaiting response... "
173
  ]
174
  },
 
177
  "output_type": "stream",
178
  "text": [
179
  "200 OK\r\n",
180
+ "Length: 1066536657 (1017M) [binary/octet-stream]\r\n",
181
  "Saving to: ‘v5-L6-D2048-E0_01-split-2a.pth’\r\n",
182
  "\r\n",
183
  "\r",
184
+ " v5-L6-D20 0%[ ] 0 --.-KB/s "
185
+ ]
186
+ },
187
+ {
188
+ "name": "stdout",
189
+ "output_type": "stream",
190
+ "text": [
191
+ "\r",
192
+ " v5-L6-D204 1%[ ] 15.26M 42.9MB/s "
193
+ ]
194
+ },
195
+ {
196
+ "name": "stdout",
197
+ "output_type": "stream",
198
+ "text": [
199
+ "\r",
200
+ " v5-L6-D2048 3%[ ] 30.52M 47.7MB/s "
201
+ ]
202
+ },
203
+ {
204
+ "name": "stdout",
205
+ "output_type": "stream",
206
+ "text": [
207
+ "\r",
208
+ " v5-L6-D2048- 4%[ ] 45.26M 51.5MB/s "
209
+ ]
210
+ },
211
+ {
212
+ "name": "stdout",
213
+ "output_type": "stream",
214
+ "text": [
215
+ "\r",
216
+ " v5-L6-D2048-E 5%[> ] 59.20M 52.1MB/s "
217
+ ]
218
+ },
219
+ {
220
+ "name": "stdout",
221
+ "output_type": "stream",
222
+ "text": [
223
+ "\r",
224
+ " v5-L6-D2048-E0 6%[> ] 65.20M 48.8MB/s "
225
+ ]
226
+ },
227
+ {
228
+ "name": "stdout",
229
+ "output_type": "stream",
230
+ "text": [
231
+ "\r",
232
+ " v5-L6-D2048-E0_ 7%[> ] 76.29M 44.4MB/s "
233
+ ]
234
+ },
235
+ {
236
+ "name": "stdout",
237
+ "output_type": "stream",
238
+ "text": [
239
+ "\r",
240
+ " v5-L6-D2048-E0_0 8%[> ] 91.03M 47.2MB/s "
241
+ ]
242
+ },
243
+ {
244
+ "name": "stdout",
245
+ "output_type": "stream",
246
+ "text": [
247
+ "\r",
248
+ " v5-L6-D2048-E0_01 9%[> ] 91.55M 43.0MB/s "
249
+ ]
250
+ },
251
+ {
252
+ "name": "stdout",
253
+ "output_type": "stream",
254
+ "text": [
255
+ "\r",
256
+ " v5-L6-D2048-E0_01- 10%[=> ] 106.81M 43.2MB/s "
257
+ ]
258
+ },
259
+ {
260
+ "name": "stdout",
261
+ "output_type": "stream",
262
+ "text": [
263
+ "\r",
264
+ "v5-L6-D2048-E0_01-s 11%[=> ] 120.25M 43.9MB/s "
265
+ ]
266
+ },
267
+ {
268
+ "name": "stdout",
269
+ "output_type": "stream",
270
+ "text": [
271
+ "\r",
272
+ "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 41.4MB/s "
273
+ ]
274
+ },
275
+ {
276
+ "name": "stdout",
277
+ "output_type": "stream",
278
+ "text": [
279
+ "\r",
280
+ "-L6-D2048-E0_01-spl 13%[=> ] 136.81M 42.5MB/s eta 21s "
281
+ ]
282
+ },
283
+ {
284
+ "name": "stdout",
285
+ "output_type": "stream",
286
+ "text": [
287
+ "\r",
288
+ "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.8MB/s eta 21s "
289
+ ]
290
+ },
291
+ {
292
+ "name": "stdout",
293
+ "output_type": "stream",
294
+ "text": [
295
+ "\r",
296
+ "6-D2048-E0_01-split 15%[==> ] 152.72M 40.5MB/s eta 21s "
297
+ ]
298
+ },
299
+ {
300
+ "name": "stdout",
301
+ "output_type": "stream",
302
+ "text": [
303
+ "\r",
304
+ "-D2048-E0_01-split- 16%[==> ] 167.85M 41.5MB/s eta 21s "
305
+ ]
306
+ },
307
+ {
308
+ "name": "stdout",
309
+ "output_type": "stream",
310
+ "text": [
311
+ "\r",
312
+ "D2048-E0_01-split-2 18%[==> ] 183.10M 43.0MB/s eta 19s "
313
+ ]
314
+ },
315
+ {
316
+ "name": "stdout",
317
+ "output_type": "stream",
318
+ "text": [
319
+ "\r",
320
+ "2048-E0_01-split-2a 19%[==> ] 198.36M 43.4MB/s eta 19s "
321
+ ]
322
+ },
323
+ {
324
+ "name": "stdout",
325
+ "output_type": "stream",
326
+ "text": [
327
+ "\r",
328
+ "048-E0_01-split-2a. 20%[===> ] 213.11M 44.1MB/s eta 19s "
329
+ ]
330
+ },
331
+ {
332
+ "name": "stdout",
333
+ "output_type": "stream",
334
+ "text": [
335
+ "\r",
336
+ "48-E0_01-split-2a.p 22%[===> ] 228.36M 43.3MB/s eta 19s "
337
+ ]
338
+ },
339
+ {
340
+ "name": "stdout",
341
+ "output_type": "stream",
342
+ "text": [
343
+ "\r",
344
+ "8-E0_01-split-2a.pt 22%[===> ] 228.87M 41.1MB/s eta 18s "
345
+ ]
346
+ },
347
+ {
348
+ "name": "stdout",
349
+ "output_type": "stream",
350
+ "text": [
351
+ "\r",
352
+ "-E0_01-split-2a.pth 24%[===> ] 244.13M 41.0MB/s eta 18s "
353
+ ]
354
+ },
355
+ {
356
+ "name": "stdout",
357
+ "output_type": "stream",
358
+ "text": [
359
+ "\r",
360
+ "E0_01-split-2a.pth 25%[====> ] 259.40M 42.4MB/s eta 18s "
361
+ ]
362
+ },
363
+ {
364
+ "name": "stdout",
365
+ "output_type": "stream",
366
+ "text": [
367
+ "\r",
368
+ "0_01-split-2a.pth 26%[====> ] 272.83M 40.4MB/s eta 18s "
369
+ ]
370
+ },
371
+ {
372
+ "name": "stdout",
373
+ "output_type": "stream",
374
+ "text": [
375
+ "\r",
376
+ "_01-split-2a.pth 28%[====> ] 289.40M 41.4MB/s eta 18s "
377
+ ]
378
+ },
379
+ {
380
+ "name": "stdout",
381
+ "output_type": "stream",
382
+ "text": [
383
+ "\r",
384
+ "01-split-2a.pth 28%[====> ] 289.92M 37.9MB/s eta 18s "
385
+ ]
386
+ },
387
+ {
388
+ "name": "stdout",
389
+ "output_type": "stream",
390
+ "text": [
391
+ "\r",
392
+ "1-split-2a.pth 29%[====> ] 304.66M 36.1MB/s eta 19s "
393
+ ]
394
+ },
395
+ {
396
+ "name": "stdout",
397
+ "output_type": "stream",
398
+ "text": [
399
+ "\r",
400
+ "-split-2a.pth 30%[=====> ] 305.18M 33.4MB/s eta 19s "
401
+ ]
402
+ },
403
+ {
404
+ "name": "stdout",
405
+ "output_type": "stream",
406
+ "text": [
407
+ "\r",
408
+ "split-2a.pth 31%[=====> ] 318.60M 33.2MB/s eta 19s "
409
+ ]
410
+ },
411
+ {
412
+ "name": "stdout",
413
+ "output_type": "stream",
414
+ "text": [
415
+ "\r",
416
+ "plit-2a.pth 31%[=====> ] 320.29M 33.3MB/s eta 19s "
417
+ ]
418
+ },
419
+ {
420
+ "name": "stdout",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "\r",
424
+ "lit-2a.pth 31%[=====> ] 320.57M 30.7MB/s eta 19s "
425
+ ]
426
+ },
427
+ {
428
+ "name": "stdout",
429
+ "output_type": "stream",
430
+ "text": [
431
+ "\r",
432
+ "it-2a.pth 32%[=====> ] 335.18M 30.1MB/s eta 19s "
433
+ ]
434
+ },
435
+ {
436
+ "name": "stdout",
437
+ "output_type": "stream",
438
+ "text": [
439
+ "\r",
440
+ "t-2a.pth 33%[=====> ] 345.53M 31.2MB/s eta 19s "
441
+ ]
442
+ },
443
+ {
444
+ "name": "stdout",
445
+ "output_type": "stream",
446
+ "text": [
447
+ "\r",
448
+ "-2a.pth 34%[=====> ] 350.82M 29.7MB/s eta 19s "
449
+ ]
450
+ },
451
+ {
452
+ "name": "stdout",
453
+ "output_type": "stream",
454
+ "text": [
455
+ "\r",
456
+ "2a.pth 35%[======> ] 360.98M 31.3MB/s eta 19s "
457
+ ]
458
+ },
459
+ {
460
+ "name": "stdout",
461
+ "output_type": "stream",
462
+ "text": [
463
+ "\r",
464
+ "a.pth 36%[======> ] 366.20M 29.6MB/s eta 19s "
465
+ ]
466
+ },
467
+ {
468
+ "name": "stdout",
469
+ "output_type": "stream",
470
+ "text": [
471
+ "\r",
472
+ ".pth 37%[======> ] 380.96M 30.8MB/s eta 17s "
473
+ ]
474
+ },
475
+ {
476
+ "name": "stdout",
477
+ "output_type": "stream",
478
+ "text": [
479
+ "\r",
480
+ "pth 38%[======> ] 392.79M 32.0MB/s eta 17s "
481
+ ]
482
+ },
483
+ {
484
+ "name": "stdout",
485
+ "output_type": "stream",
486
+ "text": [
487
+ "\r",
488
+ "th 39%[======> ] 396.73M 29.1MB/s eta 17s "
489
+ ]
490
+ },
491
+ {
492
+ "name": "stdout",
493
+ "output_type": "stream",
494
+ "text": [
495
+ "\r",
496
+ "h 40%[=======> ] 411.99M 29.1MB/s eta 17s "
497
+ ]
498
+ },
499
+ {
500
+ "name": "stdout",
501
+ "output_type": "stream",
502
+ "text": [
503
+ "\r",
504
+ " 41%[=======> ] 426.73M 28.7MB/s eta 16s "
505
+ ]
506
+ },
507
+ {
508
+ "name": "stdout",
509
+ "output_type": "stream",
510
+ "text": [
511
+ "\r",
512
+ " v 42%[=======> ] 427.25M 29.1MB/s eta 16s "
513
+ ]
514
+ },
515
+ {
516
+ "name": "stdout",
517
+ "output_type": "stream",
518
+ "text": [
519
+ "\r",
520
+ " v5 42%[=======> ] 435.25M 27.9MB/s eta 16s "
521
+ ]
522
+ },
523
+ {
524
+ "name": "stdout",
525
+ "output_type": "stream",
526
+ "text": [
527
+ "\r",
528
+ " v5- 43%[=======> ] 438.04M 28.2MB/s eta 16s "
529
+ ]
530
+ },
531
+ {
532
+ "name": "stdout",
533
+ "output_type": "stream",
534
+ "text": [
535
+ "\r",
536
+ " v5-L 43%[=======> ] 442.05M 29.7MB/s eta 16s "
537
+ ]
538
+ },
539
+ {
540
+ "name": "stdout",
541
+ "output_type": "stream",
542
+ "text": [
543
+ "\r",
544
+ " v5-L6 43%[=======> ] 446.00M 31.1MB/s eta 16s "
545
+ ]
546
+ },
547
+ {
548
+ "name": "stdout",
549
+ "output_type": "stream",
550
+ "text": [
551
+ "\r",
552
+ " v5-L6- 44%[=======> ] 457.24M 33.9MB/s eta 16s "
553
+ ]
554
+ },
555
+ {
556
+ "name": "stdout",
557
+ "output_type": "stream",
558
+ "text": [
559
+ "\r",
560
+ " v5-L6-D 45%[========> ] 457.89M 31.6MB/s eta 16s "
561
+ ]
562
+ },
563
+ {
564
+ "name": "stdout",
565
+ "output_type": "stream",
566
+ "text": [
567
+ "\r",
568
+ " v5-L6-D2 46%[========> ] 473.02M 34.8MB/s eta 16s "
569
+ ]
570
+ },
571
+ {
572
+ "name": "stdout",
573
+ "output_type": "stream",
574
+ "text": [
575
+ "\r",
576
+ " v5-L6-D20 48%[========> ] 488.28M 34.1MB/s eta 15s "
577
+ ]
578
+ },
579
+ {
580
+ "name": "stdout",
581
+ "output_type": "stream",
582
+ "text": [
583
+ "\r",
584
+ " v5-L6-D204 49%[========> ] 503.03M 34.6MB/s eta 15s "
585
+ ]
586
+ },
587
+ {
588
+ "name": "stdout",
589
+ "output_type": "stream",
590
+ "text": [
591
+ "\r",
592
+ " v5-L6-D2048 50%[=========> ] 518.29M 37.3MB/s eta 15s "
593
+ ]
594
+ },
595
+ {
596
+ "name": "stdout",
597
+ "output_type": "stream",
598
+ "text": [
599
+ "\r",
600
+ " v5-L6-D2048- 51%[=========> ] 525.10M 35.8MB/s eta 15s "
601
+ ]
602
+ },
603
+ {
604
+ "name": "stdout",
605
+ "output_type": "stream",
606
+ "text": [
607
+ "\r",
608
+ " v5-L6-D2048-E 52%[=========> ] 534.05M 34.4MB/s eta 13s "
609
+ ]
610
+ },
611
+ {
612
+ "name": "stdout",
613
+ "output_type": "stream",
614
+ "text": [
615
+ "\r",
616
+ " v5-L6-D2048-E0 53%[=========> ] 548.80M 34.4MB/s eta 13s "
617
+ ]
618
+ },
619
+ {
620
+ "name": "stdout",
621
+ "output_type": "stream",
622
+ "text": [
623
+ "\r",
624
+ " v5-L6-D2048-E0_ 55%[==========> ] 562.75M 33.8MB/s eta 13s "
625
+ ]
626
+ },
627
+ {
628
+ "name": "stdout",
629
+ "output_type": "stream",
630
+ "text": [
631
+ "\r",
632
+ " v5-L6-D2048-E0_0 56%[==========> ] 579.31M 36.0MB/s eta 13s "
633
+ ]
634
+ },
635
+ {
636
+ "name": "stdout",
637
+ "output_type": "stream",
638
+ "text": [
639
+ "\r",
640
+ " v5-L6-D2048-E0_01 57%[==========> ] 581.49M 36.7MB/s eta 12s "
641
+ ]
642
+ },
643
+ {
644
+ "name": "stdout",
645
+ "output_type": "stream",
646
+ "text": [
647
+ "\r",
648
+ " v5-L6-D2048-E0_01- 58%[==========> ] 592.93M 37.4MB/s eta 12s "
649
+ ]
650
+ },
651
+ {
652
+ "name": "stdout",
653
+ "output_type": "stream",
654
+ "text": [
655
+ "\r",
656
+ "v5-L6-D2048-E0_01-s 58%[==========> ] 595.09M 37.1MB/s eta 12s "
657
+ ]
658
+ },
659
+ {
660
+ "name": "stdout",
661
+ "output_type": "stream",
662
+ "text": [
663
+ "\r",
664
+ "5-L6-D2048-E0_01-sp 60%[===========> ] 610.35M 38.5MB/s eta 12s "
665
+ ]
666
+ },
667
+ {
668
+ "name": "stdout",
669
+ "output_type": "stream",
670
+ "text": [
671
+ "\r",
672
+ "-L6-D2048-E0_01-spl 61%[===========> ] 625.61M 38.7MB/s eta 11s "
673
+ ]
674
+ },
675
+ {
676
+ "name": "stdout",
677
+ "output_type": "stream",
678
+ "text": [
679
+ "\r",
680
+ "L6-D2048-E0_01-spli 62%[===========> ] 640.36M 39.9MB/s eta 11s "
681
+ ]
682
+ },
683
+ {
684
+ "name": "stdout",
685
+ "output_type": "stream",
686
+ "text": [
687
+ "\r",
688
+ "6-D2048-E0_01-split 64%[===========> ] 653.30M 39.5MB/s eta 11s "
689
+ ]
690
+ },
691
+ {
692
+ "name": "stdout",
693
+ "output_type": "stream",
694
+ "text": [
695
+ "\r",
696
+ "-D2048-E0_01-split- 64%[===========> ] 656.13M 38.5MB/s eta 11s "
697
+ ]
698
+ },
699
+ {
700
+ "name": "stdout",
701
+ "output_type": "stream",
702
+ "text": [
703
+ "\r",
704
+ "D2048-E0_01-split-2 66%[============> ] 671.38M 38.9MB/s eta 9s "
705
+ ]
706
+ },
707
+ {
708
+ "name": "stdout",
709
+ "output_type": "stream",
710
+ "text": [
711
+ "\r",
712
+ "2048-E0_01-split-2a 67%[============> ] 685.57M 39.7MB/s eta 9s "
713
+ ]
714
+ },
715
+ {
716
+ "name": "stdout",
717
+ "output_type": "stream",
718
+ "text": [
719
+ "\r",
720
+ "048-E0_01-split-2a. 67%[============> ] 686.64M 37.5MB/s eta 9s "
721
+ ]
722
+ },
723
+ {
724
+ "name": "stdout",
725
+ "output_type": "stream",
726
+ "text": [
727
+ "\r",
728
+ "48-E0_01-split-2a.p 68%[============> ] 701.39M 37.9MB/s eta 9s "
729
+ ]
730
+ },
731
+ {
732
+ "name": "stdout",
733
+ "output_type": "stream",
734
+ "text": [
735
+ "\r",
736
+ "8-E0_01-split-2a.pt 69%[============> ] 708.59M 38.8MB/s eta 8s "
737
+ ]
738
+ },
739
+ {
740
+ "name": "stdout",
741
+ "output_type": "stream",
742
+ "text": [
743
+ "\r",
744
+ "-E0_01-split-2a.pth 70%[=============> ] 715.34M 38.2MB/s eta 8s "
745
+ ]
746
+ },
747
+ {
748
+ "name": "stdout",
749
+ "output_type": "stream",
750
+ "text": [
751
+ "\r",
752
+ "E0_01-split-2a.pth 71%[=============> ] 731.91M 40.7MB/s eta 8s "
753
+ ]
754
+ },
755
+ {
756
+ "name": "stdout",
757
+ "output_type": "stream",
758
+ "text": [
759
+ "\r",
760
+ "0_01-split-2a.pth 73%[=============> ] 747.17M 38.0MB/s eta 8s "
761
+ ]
762
+ },
763
+ {
764
+ "name": "stdout",
765
+ "output_type": "stream",
766
+ "text": [
767
+ "\r",
768
+ "_01-split-2a.pth 73%[=============> ] 747.75M 38.0MB/s eta 7s "
769
+ ]
770
+ },
771
+ {
772
+ "name": "stdout",
773
+ "output_type": "stream",
774
+ "text": [
775
+ "\r",
776
+ "01-split-2a.pth 74%[=============> ] 762.42M 40.2MB/s eta 7s "
777
+ ]
778
+ },
779
+ {
780
+ "name": "stdout",
781
+ "output_type": "stream",
782
+ "text": [
783
+ "\r",
784
+ "1-split-2a.pth 75%[==============> ] 762.94M 37.2MB/s eta 7s "
785
+ ]
786
+ },
787
+ {
788
+ "name": "stdout",
789
+ "output_type": "stream",
790
+ "text": [
791
+ "\r",
792
+ "-split-2a.pth 76%[==============> ] 776.37M 36.7MB/s eta 7s "
793
+ ]
794
+ },
795
+ {
796
+ "name": "stdout",
797
+ "output_type": "stream",
798
+ "text": [
799
+ "\r",
800
+ "split-2a.pth 76%[==============> ] 778.20M 34.9MB/s eta 7s "
801
+ ]
802
+ },
803
+ {
804
+ "name": "stdout",
805
+ "output_type": "stream",
806
+ "text": [
807
+ "\r",
808
+ "plit-2a.pth 77%[==============> ] 791.63M 38.1MB/s eta 7s "
809
+ ]
810
+ },
811
+ {
812
+ "name": "stdout",
813
+ "output_type": "stream",
814
+ "text": [
815
+ "\r",
816
+ "lit-2a.pth 78%[==============> ] 793.46M 36.0MB/s eta 7s "
817
+ ]
818
+ },
819
+ {
820
+ "name": "stdout",
821
+ "output_type": "stream",
822
+ "text": [
823
+ "\r",
824
+ "it-2a.pth 79%[==============> ] 808.20M 38.6MB/s eta 7s "
825
+ ]
826
+ },
827
+ {
828
+ "name": "stdout",
829
+ "output_type": "stream",
830
+ "text": [
831
+ "\r",
832
+ "t-2a.pth 80%[===============> ] 816.07M 36.7MB/s eta 7s "
833
+ ]
834
+ },
835
+ {
836
+ "name": "stdout",
837
+ "output_type": "stream",
838
+ "text": [
839
+ "\r",
840
+ "-2a.pth 81%[===============> ] 823.97M 34.7MB/s eta 5s "
841
+ ]
842
+ },
843
+ {
844
+ "name": "stdout",
845
+ "output_type": "stream",
846
+ "text": [
847
+ "\r",
848
+ "2a.pth 82%[===============> ] 837.41M 36.4MB/s eta 5s "
849
+ ]
850
+ },
851
+ {
852
+ "name": "stdout",
853
+ "output_type": "stream",
854
+ "text": [
855
+ "\r",
856
+ "a.pth 83%[===============> ] 853.98M 38.3MB/s eta 5s "
857
+ ]
858
+ },
859
+ {
860
+ "name": "stdout",
861
+ "output_type": "stream",
862
+ "text": [
863
+ "\r",
864
+ ".pth 85%[================> ] 867.67M 38.4MB/s eta 5s "
865
+ ]
866
+ },
867
+ {
868
+ "name": "stdout",
869
+ "output_type": "stream",
870
+ "text": [
871
+ "\r",
872
+ "pth 85%[================> ] 873.17M 39.1MB/s eta 5s "
873
+ ]
874
+ },
875
+ {
876
+ "name": "stdout",
877
+ "output_type": "stream",
878
+ "text": [
879
+ "\r",
880
+ "th 87%[================> ] 885.01M 36.1MB/s eta 4s "
881
+ ]
882
+ },
883
+ {
884
+ "name": "stdout",
885
+ "output_type": "stream",
886
+ "text": [
887
+ "\r",
888
+ "h 88%[================> ] 899.75M 37.6MB/s eta 4s "
889
+ ]
890
+ },
891
+ {
892
+ "name": "stdout",
893
+ "output_type": "stream",
894
+ "text": [
895
+ "\r",
896
+ " 88%[================> ] 900.40M 34.6MB/s eta 4s "
897
+ ]
898
+ },
899
+ {
900
+ "name": "stdout",
901
+ "output_type": "stream",
902
+ "text": [
903
+ "\r",
904
+ " v 90%[=================> ] 915.53M 35.4MB/s eta 4s "
905
+ ]
906
+ },
907
+ {
908
+ "name": "stdout",
909
+ "output_type": "stream",
910
+ "text": [
911
+ "\r",
912
+ " v5 91%[=================> ] 930.78M 37.6MB/s eta 2s "
913
+ ]
914
+ },
915
+ {
916
+ "name": "stdout",
917
+ "output_type": "stream",
918
+ "text": [
919
+ "\r",
920
+ " v5- 92%[=================> ] 945.53M 40.9MB/s eta 2s "
921
+ ]
922
+ },
923
+ {
924
+ "name": "stdout",
925
+ "output_type": "stream",
926
+ "text": [
927
+ "\r",
928
+ " v5-L 93%[=================> ] 946.04M 37.6MB/s eta 2s "
929
+ ]
930
+ },
931
+ {
932
+ "name": "stdout",
933
+ "output_type": "stream",
934
+ "text": [
935
+ "\r",
936
+ " v5-L6 94%[=================> ] 959.48M 38.0MB/s eta 2s "
937
+ ]
938
+ },
939
+ {
940
+ "name": "stdout",
941
+ "output_type": "stream",
942
+ "text": [
943
+ "\r",
944
+ " v5-L6- 94%[=================> ] 961.30M 33.1MB/s eta 2s "
945
+ ]
946
+ },
947
+ {
948
+ "name": "stdout",
949
+ "output_type": "stream",
950
+ "text": [
951
+ "\r",
952
+ " v5-L6-D 95%[==================> ] 976.05M 34.9MB/s eta 2s "
953
+ ]
954
+ },
955
+ {
956
+ "name": "stdout",
957
+ "output_type": "stream",
958
+ "text": [
959
+ "\r",
960
+ " v5-L6-D2 97%[==================> ] 991.31M 34.8MB/s eta 2s "
961
+ ]
962
+ },
963
+ {
964
+ "name": "stdout",
965
+ "output_type": "stream",
966
+ "text": [
967
+ "\r",
968
+ " v5-L6-D20 97%[==================> ] 992.94M 35.0MB/s eta 2s "
969
+ ]
970
+ },
971
+ {
972
+ "name": "stdout",
973
+ "output_type": "stream",
974
+ "text": [
975
+ "\r",
976
+ " v5-L6-D204 98%[==================> ] 1005M 34.7MB/s eta 0s "
977
+ ]
978
+ },
979
+ {
980
+ "name": "stdout",
981
+ "output_type": "stream",
982
+ "text": [
983
+ "\r",
984
+ " v5-L6-D2048 99%[==================> ] 1016M 33.7MB/s eta 0s \r",
985
+ "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 33.9MB/s in 28s \r\n",
986
  "\r\n",
987
+ "2023-10-11 08:02:52 (36.4 MB/s) - ‘v5-L6-D2048-E0_01-split-2a.pth’ saved [1066536657/1066536657]\r\n",
988
  "\r\n"
989
  ]
990
  },
 
992
  "name": "stdout",
993
  "output_type": "stream",
994
  "text": [
995
+ "--2023-10-11 08:02:53-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\r\n",
996
+ "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.7, ...\r\n",
997
+ "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n",
998
+ "HTTP request sent, awaiting response... 302 Found\r\n",
999
+ "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
1000
+ "--2023-10-11 08:02:53-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/6b64a1018631b9ddd15a746002bab3eafe956dced78a91af7abcdadaae4a7b25?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L6-D2048-E0_01-split-2b.pth%3B+filename%3D%22v5-L6-D2048-E0_01-split-2b.pth%22%3B&Expires=1697270573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NzI3MDU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzZiNjRhMTAxODYzMWI5ZGRkMTVhNzQ2MDAyYmFiM2VhZmU5NTZkY2VkNzhhOTFhZjdhYmNkYWRhYWU0YTdiMjU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=WkKE1KjbKeVQp4dWdBuAAbOfx2JJs%7EDJaKbx8gRQSGABLfGDhkq2L8Q9KZ1fg1v%7E74c0Mkrbvop33pAwQDh782jzEiogbDb8HXSO7AtIYQqvI6K-fmb%7EpxQPFrmypJwWhQj9ePRZX2KSL6LcqN1X0GAheI-PQENpVH3svxhhib2-fYDmuvnpGX7pc6n36GES6lvwOuCQOxfIhlFnIiuNEU00NaBdDiaXb-uteXhSkKO-1EFCM0fBtwT5hVkdHZQG2m6iMcI2KaN0AHV%7EvF838f4DM%7ERbjVkRgwphRaYZxmJxUKZxGTV7rRJjIQA%7EOlnPllE1dSdwJ7y0ULOIKQHYUQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
1001
+ "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 3.162.112.95, 3.162.112.100, 3.162.112.2, ...\r\n",
1002
+ "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|3.162.112.95|:443... connected.\r\n"
1003
+ ]
1004
+ },
1005
+ {
1006
+ "name": "stdout",
1007
+ "output_type": "stream",
1008
+ "text": [
1009
+ "HTTP request sent, awaiting response... "
1010
+ ]
1011
+ },
1012
+ {
1013
+ "name": "stdout",
1014
+ "output_type": "stream",
1015
+ "text": [
1016
+ "200 OK\r\n",
1017
+ "Length: 1066536657 (1017M) [binary/octet-stream]\r\n",
1018
+ "Saving to: ‘v5-L6-D2048-E0_01-split-2b.pth’\r\n",
1019
+ "\r\n",
1020
+ "\r",
1021
+ " v5-L6-D20 0%[ ] 0 --.-KB/s "
1022
+ ]
1023
+ },
1024
+ {
1025
+ "name": "stdout",
1026
+ "output_type": "stream",
1027
+ "text": [
1028
+ "\r",
1029
+ " v5-L6-D204 1%[ ] 14.74M 67.8MB/s "
1030
+ ]
1031
+ },
1032
+ {
1033
+ "name": "stdout",
1034
+ "output_type": "stream",
1035
+ "text": [
1036
+ "\r",
1037
+ " v5-L6-D2048 2%[ ] 28.69M 63.1MB/s "
1038
+ ]
1039
+ },
1040
+ {
1041
+ "name": "stdout",
1042
+ "output_type": "stream",
1043
+ "text": [
1044
+ "\r",
1045
+ " v5-L6-D2048- 3%[ ] 30.52M 42.3MB/s "
1046
+ ]
1047
+ },
1048
+ {
1049
+ "name": "stdout",
1050
+ "output_type": "stream",
1051
+ "text": [
1052
+ "\r",
1053
+ " v5-L6-D2048-E 4%[ ] 45.26M 45.2MB/s "
1054
+ ]
1055
+ },
1056
+ {
1057
+ "name": "stdout",
1058
+ "output_type": "stream",
1059
+ "text": [
1060
+ "\r",
1061
+ " v5-L6-D2048-E0 4%[ ] 45.78M 37.9MB/s "
1062
+ ]
1063
+ },
1064
+ {
1065
+ "name": "stdout",
1066
+ "output_type": "stream",
1067
+ "text": [
1068
+ "\r",
1069
+ " v5-L6-D2048-E0_ 6%[> ] 61.03M 41.2MB/s "
1070
+ ]
1071
+ },
1072
+ {
1073
+ "name": "stdout",
1074
+ "output_type": "stream",
1075
+ "text": [
1076
+ "\r",
1077
+ " v5-L6-D2048-E0_0 7%[> ] 75.78M 45.0MB/s "
1078
+ ]
1079
+ },
1080
+ {
1081
+ "name": "stdout",
1082
+ "output_type": "stream",
1083
+ "text": [
1084
+ "\r",
1085
+ " v5-L6-D2048-E0_01 8%[> ] 85.94M 45.6MB/s "
1086
+ ]
1087
+ },
1088
+ {
1089
+ "name": "stdout",
1090
+ "output_type": "stream",
1091
+ "text": [
1092
+ "\r",
1093
+ " v5-L6-D2048-E0_01- 9%[> ] 91.55M 40.8MB/s "
1094
+ ]
1095
+ },
1096
+ {
1097
+ "name": "stdout",
1098
+ "output_type": "stream",
1099
+ "text": [
1100
+ "\r",
1101
+ "v5-L6-D2048-E0_01-s 10%[=> ] 106.81M 40.5MB/s "
1102
+ ]
1103
+ },
1104
+ {
1105
+ "name": "stdout",
1106
+ "output_type": "stream",
1107
+ "text": [
1108
+ "\r",
1109
+ "5-L6-D2048-E0_01-sp 12%[=> ] 122.07M 40.2MB/s eta 22s "
1110
+ ]
1111
+ },
1112
+ {
1113
+ "name": "stdout",
1114
+ "output_type": "stream",
1115
+ "text": [
1116
+ "\r",
1117
+ "-L6-D2048-E0_01-spl 13%[=> ] 137.33M 41.7MB/s eta 22s "
1118
+ ]
1119
+ },
1120
+ {
1121
+ "name": "stdout",
1122
+ "output_type": "stream",
1123
+ "text": [
1124
+ "\r",
1125
+ "L6-D2048-E0_01-spli 14%[=> ] 152.07M 42.9MB/s eta 22s "
1126
+ ]
1127
+ },
1128
+ {
1129
+ "name": "stdout",
1130
+ "output_type": "stream",
1131
+ "text": [
1132
+ "\r",
1133
+ "6-D2048-E0_01-split 16%[==> ] 167.33M 43.6MB/s eta 22s "
1134
+ ]
1135
+ },
1136
+ {
1137
+ "name": "stdout",
1138
+ "output_type": "stream",
1139
+ "text": [
1140
+ "\r",
1141
+ "-D2048-E0_01-split- 17%[==> ] 181.32M 44.9MB/s eta 19s "
1142
+ ]
1143
+ },
1144
+ {
1145
+ "name": "stdout",
1146
+ "output_type": "stream",
1147
+ "text": [
1148
+ "\r",
1149
+ "D2048-E0_01-split-2 18%[==> ] 183.10M 41.7MB/s eta 19s "
1150
+ ]
1151
+ },
1152
+ {
1153
+ "name": "stdout",
1154
+ "output_type": "stream",
1155
+ "text": [
1156
+ "\r",
1157
+ "2048-E0_01-split-2b 19%[==> ] 196.53M 41.8MB/s eta 19s "
1158
+ ]
1159
+ },
1160
+ {
1161
+ "name": "stdout",
1162
+ "output_type": "stream",
1163
+ "text": [
1164
+ "\r",
1165
+ "048-E0_01-split-2b. 19%[==> ] 198.36M 39.0MB/s eta 19s "
1166
+ ]
1167
+ },
1168
+ {
1169
+ "name": "stdout",
1170
+ "output_type": "stream",
1171
+ "text": [
1172
+ "\r",
1173
+ "48-E0_01-split-2b.p 20%[===> ] 213.11M 39.0MB/s eta 20s "
1174
+ ]
1175
+ },
1176
+ {
1177
+ "name": "stdout",
1178
+ "output_type": "stream",
1179
+ "text": [
1180
+ "\r",
1181
+ "8-E0_01-split-2b.pt 21%[===> ] 220.29M 40.8MB/s eta 20s "
1182
+ ]
1183
+ },
1184
+ {
1185
+ "name": "stdout",
1186
+ "output_type": "stream",
1187
+ "text": [
1188
+ "\r",
1189
+ "-E0_01-split-2b.pth 22%[===> ] 228.36M 39.9MB/s eta 20s "
1190
+ ]
1191
+ },
1192
+ {
1193
+ "name": "stdout",
1194
+ "output_type": "stream",
1195
+ "text": [
1196
+ "\r",
1197
+ "E0_01-split-2b.pth 24%[===> ] 244.13M 40.3MB/s eta 20s "
1198
+ ]
1199
+ },
1200
+ {
1201
+ "name": "stdout",
1202
+ "output_type": "stream",
1203
+ "text": [
1204
+ "\r",
1205
+ "0_01-split-2b.pth 25%[====> ] 259.40M 40.4MB/s eta 18s "
1206
+ ]
1207
+ },
1208
+ {
1209
+ "name": "stdout",
1210
+ "output_type": "stream",
1211
+ "text": [
1212
+ "\r",
1213
+ "_01-split-2b.pth 26%[====> ] 274.14M 42.1MB/s eta 18s "
1214
+ ]
1215
+ },
1216
+ {
1217
+ "name": "stdout",
1218
+ "output_type": "stream",
1219
+ "text": [
1220
+ "\r",
1221
+ "01-split-2b.pth 27%[====> ] 274.66M 38.5MB/s eta 18s "
1222
+ ]
1223
+ },
1224
+ {
1225
+ "name": "stdout",
1226
+ "output_type": "stream",
1227
+ "text": [
1228
+ "\r",
1229
+ "1-split-2b.pth 28%[====> ] 289.92M 41.6MB/s eta 18s "
1230
+ ]
1231
+ },
1232
+ {
1233
+ "name": "stdout",
1234
+ "output_type": "stream",
1235
+ "text": [
1236
+ "\r",
1237
+ "-split-2b.pth 30%[=====> ] 305.18M 41.6MB/s eta 17s "
1238
+ ]
1239
+ },
1240
+ {
1241
+ "name": "stdout",
1242
+ "output_type": "stream",
1243
+ "text": [
1244
+ "\r",
1245
+ "split-2b.pth 31%[=====> ] 320.43M 40.9MB/s eta 17s "
1246
+ ]
1247
+ },
1248
+ {
1249
+ "name": "stdout",
1250
+ "output_type": "stream",
1251
+ "text": [
1252
+ "\r",
1253
+ "plit-2b.pth 32%[=====> ] 335.18M 41.2MB/s eta 17s "
1254
+ ]
1255
+ },
1256
+ {
1257
+ "name": "stdout",
1258
+ "output_type": "stream",
1259
+ "text": [
1260
+ "\r",
1261
+ "lit-2b.pth 33%[=====> ] 335.69M 38.5MB/s eta 17s "
1262
+ ]
1263
+ },
1264
+ {
1265
+ "name": "stdout",
1266
+ "output_type": "stream",
1267
+ "text": [
1268
+ "\r",
1269
+ "it-2b.pth 34%[=====> ] 350.95M 38.6MB/s eta 16s "
1270
+ ]
1271
+ },
1272
+ {
1273
+ "name": "stdout",
1274
+ "output_type": "stream",
1275
+ "text": [
1276
+ "\r",
1277
+ "t-2b.pth 35%[======> ] 365.70M 40.9MB/s eta 16s "
1278
+ ]
1279
+ },
1280
+ {
1281
+ "name": "stdout",
1282
+ "output_type": "stream",
1283
+ "text": [
1284
+ "\r",
1285
+ "-2b.pth 36%[======> ] 366.20M 38.0MB/s eta 16s "
1286
+ ]
1287
+ },
1288
+ {
1289
+ "name": "stdout",
1290
+ "output_type": "stream",
1291
+ "text": [
1292
+ "\r",
1293
+ "2b.pth 37%[======> ] 381.47M 38.8MB/s eta 16s "
1294
+ ]
1295
+ },
1296
+ {
1297
+ "name": "stdout",
1298
+ "output_type": "stream",
1299
+ "text": [
1300
+ "\r",
1301
+ "b.pth 37%[======> ] 385.65M 39.1MB/s eta 16s "
1302
+ ]
1303
+ },
1304
+ {
1305
+ "name": "stdout",
1306
+ "output_type": "stream",
1307
+ "text": [
1308
+ "\r",
1309
+ ".pth 39%[======> ] 396.73M 36.3MB/s eta 16s "
1310
+ ]
1311
+ },
1312
+ {
1313
+ "name": "stdout",
1314
+ "output_type": "stream",
1315
+ "text": [
1316
+ "\r",
1317
+ "pth 39%[======> ] 406.75M 37.5MB/s eta 16s "
1318
+ ]
1319
+ },
1320
+ {
1321
+ "name": "stdout",
1322
+ "output_type": "stream",
1323
+ "text": [
1324
+ "\r",
1325
+ "th 40%[=======> ] 411.99M 33.0MB/s eta 16s "
1326
+ ]
1327
+ },
1328
+ {
1329
+ "name": "stdout",
1330
+ "output_type": "stream",
1331
+ "text": [
1332
+ "\r",
1333
+ "h 42%[=======> ] 427.25M 33.6MB/s eta 15s "
1334
+ ]
1335
+ },
1336
+ {
1337
+ "name": "stdout",
1338
+ "output_type": "stream",
1339
+ "text": [
1340
+ "\r",
1341
+ " 43%[=======> ] 441.98M 32.7MB/s eta 15s "
1342
+ ]
1343
+ },
1344
+ {
1345
+ "name": "stdout",
1346
+ "output_type": "stream",
1347
+ "text": [
1348
+ "\r",
1349
+ " v 43%[=======> ] 442.51M 32.5MB/s eta 15s "
1350
+ ]
1351
+ },
1352
+ {
1353
+ "name": "stdout",
1354
+ "output_type": "stream",
1355
+ "text": [
1356
+ "\r",
1357
+ " v5 44%[=======> ] 457.25M 32.2MB/s eta 15s "
1358
+ ]
1359
+ },
1360
+ {
1361
+ "name": "stdout",
1362
+ "output_type": "stream",
1363
+ "text": [
1364
+ "\r",
1365
+ " v5- 45%[========> ] 457.76M 32.2MB/s eta 15s "
1366
+ ]
1367
+ },
1368
+ {
1369
+ "name": "stdout",
1370
+ "output_type": "stream",
1371
+ "text": [
1372
+ "\r",
1373
+ " v5-L 46%[========> ] 472.50M 31.5MB/s eta 14s "
1374
+ ]
1375
+ },
1376
+ {
1377
+ "name": "stdout",
1378
+ "output_type": "stream",
1379
+ "text": [
1380
+ "\r",
1381
+ " v5-L6 46%[========> ] 473.02M 31.5MB/s eta 14s "
1382
+ ]
1383
+ },
1384
+ {
1385
+ "name": "stdout",
1386
+ "output_type": "stream",
1387
+ "text": [
1388
+ "\r",
1389
+ " v5-L6- 48%[========> ] 488.28M 30.9MB/s eta 14s "
1390
+ ]
1391
+ },
1392
+ {
1393
+ "name": "stdout",
1394
+ "output_type": "stream",
1395
+ "text": [
1396
+ "\r",
1397
+ " v5-L6-D 49%[========> ] 503.54M 33.6MB/s eta 14s "
1398
+ ]
1399
+ },
1400
+ {
1401
+ "name": "stdout",
1402
+ "output_type": "stream",
1403
+ "text": [
1404
+ "\r",
1405
+ " v5-L6-D2 50%[=========> ] 518.29M 34.2MB/s eta 14s "
1406
+ ]
1407
+ },
1408
+ {
1409
+ "name": "stdout",
1410
+ "output_type": "stream",
1411
+ "text": [
1412
+ "\r",
1413
+ " v5-L6-D20 51%[=========> ] 518.80M 34.8MB/s eta 13s "
1414
+ ]
1415
+ },
1416
+ {
1417
+ "name": "stdout",
1418
+ "output_type": "stream",
1419
+ "text": [
1420
+ "\r",
1421
+ " v5-L6-D204 52%[=========> ] 534.05M 34.2MB/s eta 13s "
1422
+ ]
1423
+ },
1424
+ {
1425
+ "name": "stdout",
1426
+ "output_type": "stream",
1427
+ "text": [
1428
+ "\r",
1429
+ " v5-L6-D2048 54%[=========> ] 549.31M 37.5MB/s eta 13s "
1430
+ ]
1431
+ },
1432
+ {
1433
+ "name": "stdout",
1434
+ "output_type": "stream",
1435
+ "text": [
1436
+ "\r",
1437
+ " v5-L6-D2048- 55%[==========> ] 564.06M 37.7MB/s eta 13s "
1438
+ ]
1439
+ },
1440
+ {
1441
+ "name": "stdout",
1442
+ "output_type": "stream",
1443
+ "text": [
1444
+ "\r",
1445
+ " v5-L6-D2048-E 55%[==========> ] 565.78M 37.7MB/s eta 12s "
1446
+ ]
1447
+ },
1448
+ {
1449
+ "name": "stdout",
1450
+ "output_type": "stream",
1451
+ "text": [
1452
+ "\r",
1453
+ " v5-L6-D2048-E0 57%[==========> ] 579.83M 37.8MB/s eta 12s "
1454
+ ]
1455
+ },
1456
+ {
1457
+ "name": "stdout",
1458
+ "output_type": "stream",
1459
+ "text": [
1460
+ "\r",
1461
+ " v5-L6-D2048-E0_ 58%[==========> ] 595.09M 39.7MB/s eta 12s "
1462
+ ]
1463
+ },
1464
+ {
1465
+ "name": "stdout",
1466
+ "output_type": "stream",
1467
+ "text": [
1468
+ "\r",
1469
+ " v5-L6-D2048-E0_0 60%[===========> ] 610.35M 40.9MB/s eta 12s "
1470
+ ]
1471
+ },
1472
+ {
1473
+ "name": "stdout",
1474
+ "output_type": "stream",
1475
+ "text": [
1476
+ "\r",
1477
+ " v5-L6-D2048-E0_01 61%[===========> ] 625.47M 44.1MB/s eta 10s "
1478
+ ]
1479
+ },
1480
+ {
1481
+ "name": "stdout",
1482
+ "output_type": "stream",
1483
+ "text": [
1484
+ "\r",
1485
+ " v5-L6-D2048-E0_01- 61%[===========> ] 629.82M 42.6MB/s eta 10s "
1486
+ ]
1487
+ },
1488
+ {
1489
+ "name": "stdout",
1490
+ "output_type": "stream",
1491
+ "text": [
1492
+ "\r",
1493
+ "v5-L6-D2048-E0_01-s 63%[===========> ] 640.87M 42.6MB/s eta 10s "
1494
+ ]
1495
+ },
1496
+ {
1497
+ "name": "stdout",
1498
+ "output_type": "stream",
1499
+ "text": [
1500
+ "\r",
1501
+ "5-L6-D2048-E0_01-sp 64%[===========> ] 656.13M 45.6MB/s eta 10s "
1502
+ ]
1503
+ },
1504
+ {
1505
+ "name": "stdout",
1506
+ "output_type": "stream",
1507
+ "text": [
1508
+ "\r",
1509
+ "-L6-D2048-E0_01-spl 66%[============> ] 671.38M 45.8MB/s eta 9s "
1510
+ ]
1511
+ },
1512
+ {
1513
+ "name": "stdout",
1514
+ "output_type": "stream",
1515
+ "text": [
1516
+ "\r",
1517
+ "L6-D2048-E0_01-spli 67%[============> ] 686.64M 47.1MB/s eta 9s "
1518
+ ]
1519
+ },
1520
+ {
1521
+ "name": "stdout",
1522
+ "output_type": "stream",
1523
+ "text": [
1524
+ "\r",
1525
+ "6-D2048-E0_01-split 69%[============> ] 701.90M 47.0MB/s eta 9s "
1526
+ ]
1527
+ },
1528
+ {
1529
+ "name": "stdout",
1530
+ "output_type": "stream",
1531
+ "text": [
1532
+ "\r",
1533
+ "-D2048-E0_01-split- 70%[=============> ] 717.16M 46.9MB/s eta 9s "
1534
+ ]
1535
+ },
1536
+ {
1537
+ "name": "stdout",
1538
+ "output_type": "stream",
1539
+ "text": [
1540
+ "\r",
1541
+ "D2048-E0_01-split-2 71%[=============> ] 730.60M 47.8MB/s eta 7s "
1542
+ ]
1543
+ },
1544
+ {
1545
+ "name": "stdout",
1546
+ "output_type": "stream",
1547
+ "text": [
1548
+ "\r",
1549
+ "2048-E0_01-split-2b 73%[=============> ] 747.17M 45.9MB/s eta 7s "
1550
+ ]
1551
+ },
1552
+ {
1553
+ "name": "stdout",
1554
+ "output_type": "stream",
1555
+ "text": [
1556
+ "\r",
1557
+ "048-E0_01-split-2b. 74%[=============> ] 755.98M 45.7MB/s eta 7s "
1558
+ ]
1559
+ },
1560
+ {
1561
+ "name": "stdout",
1562
+ "output_type": "stream",
1563
+ "text": [
1564
+ "\r",
1565
+ "48-E0_01-split-2b.p 75%[==============> ] 762.94M 43.2MB/s eta 6s "
1566
+ ]
1567
+ },
1568
+ {
1569
+ "name": "stdout",
1570
+ "output_type": "stream",
1571
+ "text": [
1572
+ "\r",
1573
+ "8-E0_01-split-2b.pt 76%[==============> ] 777.68M 45.4MB/s eta 6s "
1574
+ ]
1575
+ },
1576
+ {
1577
+ "name": "stdout",
1578
+ "output_type": "stream",
1579
+ "text": [
1580
+ "\r",
1581
+ "-E0_01-split-2b.pth 76%[==============> ] 778.32M 42.7MB/s eta 6s "
1582
+ ]
1583
+ },
1584
+ {
1585
+ "name": "stdout",
1586
+ "output_type": "stream",
1587
+ "text": [
1588
+ "\r",
1589
+ "E0_01-split-2b.pth 78%[==============> ] 793.46M 42.0MB/s eta 6s "
1590
+ ]
1591
+ },
1592
+ {
1593
+ "name": "stdout",
1594
+ "output_type": "stream",
1595
+ "text": [
1596
+ "\r",
1597
+ "0_01-split-2b.pth 79%[==============> ] 808.20M 41.7MB/s eta 5s "
1598
+ ]
1599
+ },
1600
+ {
1601
+ "name": "stdout",
1602
+ "output_type": "stream",
1603
+ "text": [
1604
+ "\r",
1605
+ "_01-split-2b.pth 80%[===============> ] 814.09M 42.3MB/s eta 5s "
1606
+ ]
1607
+ },
1608
+ {
1609
+ "name": "stdout",
1610
+ "output_type": "stream",
1611
+ "text": [
1612
+ "\r",
1613
+ "01-split-2b.pth 80%[===============> ] 823.46M 41.0MB/s eta 5s "
1614
+ ]
1615
+ },
1616
+ {
1617
+ "name": "stdout",
1618
+ "output_type": "stream",
1619
+ "text": [
1620
+ "\r",
1621
+ "1-split-2b.pth 81%[===============> ] 823.97M 40.5MB/s eta 5s "
1622
+ ]
1623
+ },
1624
+ {
1625
+ "name": "stdout",
1626
+ "output_type": "stream",
1627
+ "text": [
1628
+ "\r",
1629
+ "-split-2b.pth 82%[===============> ] 838.71M 38.6MB/s eta 4s "
1630
+ ]
1631
+ },
1632
+ {
1633
+ "name": "stdout",
1634
+ "output_type": "stream",
1635
+ "text": [
1636
+ "\r",
1637
+ "split-2b.pth 83%[===============> ] 853.98M 41.4MB/s eta 4s "
1638
+ ]
1639
+ },
1640
+ {
1641
+ "name": "stdout",
1642
+ "output_type": "stream",
1643
+ "text": [
1644
+ "\r",
1645
+ "plit-2b.pth 84%[===============> ] 854.61M 38.0MB/s eta 4s "
1646
+ ]
1647
+ },
1648
+ {
1649
+ "name": "stdout",
1650
+ "output_type": "stream",
1651
+ "text": [
1652
+ "\r",
1653
+ "lit-2b.pth 85%[================> ] 869.24M 35.7MB/s eta 4s "
1654
+ ]
1655
+ },
1656
+ {
1657
+ "name": "stdout",
1658
+ "output_type": "stream",
1659
+ "text": [
1660
+ "\r",
1661
+ "it-2b.pth 85%[================> ] 869.75M 35.3MB/s eta 4s "
1662
+ ]
1663
+ },
1664
+ {
1665
+ "name": "stdout",
1666
+ "output_type": "stream",
1667
+ "text": [
1668
+ "\r",
1669
+ "t-2b.pth 86%[================> ] 875.74M 34.3MB/s eta 4s "
1670
+ ]
1671
+ },
1672
+ {
1673
+ "name": "stdout",
1674
+ "output_type": "stream",
1675
+ "text": [
1676
+ "\r",
1677
+ "-2b.pth 87%[================> ] 885.01M 32.5MB/s eta 4s "
1678
+ ]
1679
+ },
1680
+ {
1681
+ "name": "stdout",
1682
+ "output_type": "stream",
1683
+ "text": [
1684
+ "\r",
1685
+ "2b.pth 88%[================> ] 900.27M 33.8MB/s eta 4s "
1686
+ ]
1687
+ },
1688
+ {
1689
+ "name": "stdout",
1690
+ "output_type": "stream",
1691
+ "text": [
1692
+ "\r",
1693
+ "b.pth 89%[================> ] 913.70M 34.5MB/s eta 3s "
1694
+ ]
1695
+ },
1696
+ {
1697
+ "name": "stdout",
1698
+ "output_type": "stream",
1699
+ "text": [
1700
+ "\r",
1701
+ ".pth 90%[=================> ] 924.21M 34.8MB/s eta 3s "
1702
+ ]
1703
+ },
1704
+ {
1705
+ "name": "stdout",
1706
+ "output_type": "stream",
1707
+ "text": [
1708
+ "\r",
1709
+ "pth 91%[=================> ] 930.27M 35.3MB/s eta 3s "
1710
+ ]
1711
+ },
1712
+ {
1713
+ "name": "stdout",
1714
+ "output_type": "stream",
1715
+ "text": [
1716
+ "\r",
1717
+ "th 93%[=================> ] 946.04M 34.9MB/s eta 3s "
1718
+ ]
1719
+ },
1720
+ {
1721
+ "name": "stdout",
1722
+ "output_type": "stream",
1723
+ "text": [
1724
+ "\r",
1725
+ "h 94%[=================> ] 961.30M 37.2MB/s eta 1s "
1726
+ ]
1727
+ },
1728
+ {
1729
+ "name": "stdout",
1730
+ "output_type": "stream",
1731
+ "text": [
1732
+ "\r",
1733
+ " 95%[==================> ] 970.14M 35.7MB/s eta 1s "
1734
+ ]
1735
+ },
1736
+ {
1737
+ "name": "stdout",
1738
+ "output_type": "stream",
1739
+ "text": [
1740
+ "\r",
1741
+ " v 96%[==================> ] 976.55M 34.9MB/s eta 1s "
1742
+ ]
1743
+ },
1744
+ {
1745
+ "name": "stdout",
1746
+ "output_type": "stream",
1747
+ "text": [
1748
+ "\r",
1749
+ " v5 97%[==================> ] 991.82M 37.0MB/s eta 1s "
1750
+ ]
1751
+ },
1752
+ {
1753
+ "name": "stdout",
1754
+ "output_type": "stream",
1755
+ "text": [
1756
+ "\r",
1757
+ " v5- 98%[==================> ] 998.13M 35.6MB/s eta 1s "
1758
+ ]
1759
+ },
1760
+ {
1761
+ "name": "stdout",
1762
+ "output_type": "stream",
1763
+ "text": [
1764
+ "\r",
1765
+ " v5-L 98%[==================> ] 1007M 37.2MB/s eta 0s "
1766
  ]
1767
  },
1768
  {
1769
  "name": "stdout",
1770
  "output_type": "stream",
1771
  "text": [
 
 
 
 
1772
  "\r",
1773
+ " v5-L6 99%[==================> ] 1016M 34.9MB/s eta 0s \r",
1774
+ "v5-L6-D2048-E0_01-s 100%[===================>] 1017M 35.1MB/s in 26s \r\n",
1775
  "\r\n",
1776
+ "2023-10-11 08:03:19 (38.9 MB/s) - ‘v5-L6-D2048-E0_01-split-2b.pth’ saved [1066536657/1066536657]\r\n",
1777
  "\r\n"
1778
  ]
1779
  }
1780
  ],
1781
  "source": [
1782
  "# Get the init split model, and finetune from there\n",
1783
+ "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2a.pth\"\n",
1784
+ "!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/0600b94a58219f658326b4792ef5cd020e9d1a43/experiment/rwkv-x-exp/multi-size-train/v5-L6-D2048-E0_01-split-2b.pth\""
1785
  ]
1786
  },
1787
  {
1788
  "cell_type": "code",
1789
  "execution_count": 4,
1790
+ "id": "2a3cd2d1",
1791
  "metadata": {
1792
  "execution": {
1793
+ "iopub.execute_input": "2023-10-11T08:03:19.666619Z",
1794
+ "iopub.status.busy": "2023-10-11T08:03:19.665958Z",
1795
+ "iopub.status.idle": "2023-10-11T08:03:29.305787Z",
1796
+ "shell.execute_reply": "2023-10-11T08:03:29.304873Z"
1797
  },
1798
  "papermill": {
1799
+ "duration": 9.658186,
1800
+ "end_time": "2023-10-11T08:03:29.308744",
1801
  "exception": false,
1802
+ "start_time": "2023-10-11T08:03:19.650558",
1803
  "status": "completed"
1804
  },
1805
  "tags": []
 
1818
  "output_type": "stream",
1819
  "text": [
1820
  "\r",
1821
+ "Saving the dataset (0/2 shards): 7%| | 2000/27200 [00:00<00:01, 16356.85 examp"
1822
  ]
1823
  },
1824
  {
 
1826
  "output_type": "stream",
1827
  "text": [
1828
  "\r",
1829
+ "Saving the dataset (0/2 shards): 15%|▏| 4000/27200 [00:00<00:01, 17283.77 examp"
1830
  ]
1831
  },
1832
  {
 
1834
  "output_type": "stream",
1835
  "text": [
1836
  "\r",
1837
+ "Saving the dataset (0/2 shards): 22%|▏| 6000/27200 [00:00<00:01, 17873.97 examp"
1838
  ]
1839
  },
1840
  {
 
1842
  "output_type": "stream",
1843
  "text": [
1844
  "\r",
1845
+ "Saving the dataset (0/2 shards): 29%|▎| 8000/27200 [00:00<00:01, 18442.59 examp"
1846
  ]
1847
  },
1848
  {
 
1850
  "output_type": "stream",
1851
  "text": [
1852
  "\r",
1853
+ "Saving the dataset (0/2 shards): 40%|▍| 11000/27200 [00:00<00:00, 19135.78 exam"
1854
  ]
1855
  },
1856
  {
 
1858
  "output_type": "stream",
1859
  "text": [
1860
  "\r",
1861
+ "Saving the dataset (0/2 shards): 50%|▌| 13600/27200 [00:00<00:00, 19543.92 exam\r",
1862
+ "Saving the dataset (1/2 shards): 50%|▌| 13600/27200 [00:00<00:00, 19543.92 exam"
1863
  ]
1864
  },
1865
  {
 
1867
  "output_type": "stream",
1868
  "text": [
1869
  "\r",
1870
+ "Saving the dataset (1/2 shards): 65%|▋| 17600/27200 [00:00<00:00, 20515.42 exam"
1871
  ]
1872
  },
1873
  {
 
1875
  "output_type": "stream",
1876
  "text": [
1877
  "\r",
1878
+ "Saving the dataset (1/2 shards): 79%|▊| 21600/27200 [00:01<00:00, 21426.57 exam"
1879
  ]
1880
  },
1881
  {
 
1883
  "output_type": "stream",
1884
  "text": [
1885
  "\r",
1886
+ "Saving the dataset (1/2 shards): 94%|▉| 25600/27200 [00:01<00:00, 22078.81 exam"
1887
  ]
1888
  },
1889
  {
 
1891
  "output_type": "stream",
1892
  "text": [
1893
  "\r",
1894
+ "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 22078.81 exam\r",
1895
+ "Saving the dataset (2/2 shards): 100%|█| 27200/27200 [00:01<00:00, 20603.99 exam\r\n",
 
 
 
 
 
 
1896
  "\r",
1897
  "Saving the dataset (0/1 shards): 0%| | 0/109 [00:00<?, ? examples/s]\r",
1898
+ "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 8117.24 examples/\r",
1899
+ "Saving the dataset (1/1 shards): 100%|█| 109/109 [00:00<00:00, 7809.82 examples/\r\n"
1900
  ]
1901
  }
1902
  ],
 
1908
  },
1909
  {
1910
  "cell_type": "markdown",
1911
+ "id": "77d1d3e8",
1912
  "metadata": {
1913
  "papermill": {
1914
+ "duration": 0.016656,
1915
+ "end_time": "2023-10-11T08:03:29.342825",
1916
  "exception": false,
1917
+ "start_time": "2023-10-11T08:03:29.326169",
1918
  "status": "completed"
1919
  },
1920
  "tags": []
 
1926
  {
1927
  "cell_type": "code",
1928
  "execution_count": 5,
1929
+ "id": "42cb403e",
1930
  "metadata": {
1931
  "execution": {
1932
+ "iopub.execute_input": "2023-10-11T08:03:29.379159Z",
1933
+ "iopub.status.busy": "2023-10-11T08:03:29.378428Z",
1934
+ "iopub.status.idle": "2023-10-11T08:03:46.935627Z",
1935
+ "shell.execute_reply": "2023-10-11T08:03:46.934802Z"
1936
  },
1937
  "papermill": {
1938
+ "duration": 17.577903,
1939
+ "end_time": "2023-10-11T08:03:46.937715",
1940
  "exception": false,
1941
+ "start_time": "2023-10-11T08:03:29.359812",
1942
  "status": "completed"
1943
  },
1944
  "tags": []
 
1948
  "name": "stdout",
1949
  "output_type": "stream",
1950
  "text": [
1951
+ "[2023-10-11 08:03:33,838] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
1952
  ]
1953
  },
1954
  {
 
1970
  "name": "stdout",
1971
  "output_type": "stream",
1972
  "text": [
1973
+ "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1933922385\r\n",
1974
  " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
1975
+ "Global seed set to 1933922385\r\n"
1976
  ]
1977
  },
1978
  {
 
1987
  "output_type": "stream",
1988
  "text": [
1989
  "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
1990
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo\u001b[0m\r\n",
1991
  "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
1992
  "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
1993
  "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
1994
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n"
1995
  ]
1996
  },
1997
  {
 
2033
  "name": "stdout",
2034
  "output_type": "stream",
2035
  "text": [
2036
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion A3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/5696uouo\u001b[0m\r\n",
2037
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n",
2038
  "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
2039
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080337-5696uouo/logs\u001b[0m\r\n"
2040
  ]
2041
  }
2042
  ],
 
2058
  {
2059
  "cell_type": "code",
2060
  "execution_count": 6,
2061
+ "id": "53867c42",
2062
  "metadata": {
2063
  "execution": {
2064
+ "iopub.execute_input": "2023-10-11T08:03:46.969471Z",
2065
+ "iopub.status.busy": "2023-10-11T08:03:46.969019Z",
2066
+ "iopub.status.idle": "2023-10-11T08:03:50.682437Z",
2067
+ "shell.execute_reply": "2023-10-11T08:03:50.680986Z"
2068
  },
2069
  "papermill": {
2070
+ "duration": 3.732808,
2071
+ "end_time": "2023-10-11T08:03:50.685581",
2072
  "exception": false,
2073
+ "start_time": "2023-10-11T08:03:46.952773",
2074
  "status": "completed"
2075
  },
2076
  "tags": []
 
2080
  "name": "stdout",
2081
  "output_type": "stream",
2082
  "text": [
2083
+ "[2023-10-11 08:03:49,278] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
2084
  ]
2085
  },
2086
  {
 
2115
  {
2116
  "cell_type": "code",
2117
  "execution_count": 7,
2118
+ "id": "5688e577",
2119
  "metadata": {
2120
  "execution": {
2121
+ "iopub.execute_input": "2023-10-11T08:03:50.806267Z",
2122
+ "iopub.status.busy": "2023-10-11T08:03:50.804997Z",
2123
+ "iopub.status.idle": "2023-10-11T08:03:56.788036Z",
2124
+ "shell.execute_reply": "2023-10-11T08:03:56.786568Z"
2125
  },
2126
  "papermill": {
2127
+ "duration": 6.08675,
2128
+ "end_time": "2023-10-11T08:03:56.790510",
2129
  "exception": false,
2130
+ "start_time": "2023-10-11T08:03:50.703760",
2131
  "status": "completed"
2132
  },
2133
  "tags": []
 
2137
  "name": "stdout",
2138
  "output_type": "stream",
2139
  "text": [
2140
+ "[2023-10-11 08:03:54,934] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
2141
+ ]
2142
+ },
2143
+ {
2144
+ "name": "stdout",
2145
+ "output_type": "stream",
2146
+ "text": [
2147
+ "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
2148
  ]
2149
  },
2150
  {
2151
  "name": "stdout",
2152
  "output_type": "stream",
2153
  "text": [
 
2154
  "Traceback (most recent call last):\r\n",
2155
  " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/dragon_test.py\", line 52, in <module>\r\n",
2156
  " model = SimpleRWKV(MODEL_PATH, device=DEVICE)\r\n",
 
2170
  },
2171
  {
2172
  "cell_type": "markdown",
2173
+ "id": "b4927e87",
2174
  "metadata": {
2175
  "papermill": {
2176
+ "duration": 0.015295,
2177
+ "end_time": "2023-10-11T08:03:56.820640",
2178
  "exception": false,
2179
+ "start_time": "2023-10-11T08:03:56.805345",
2180
  "status": "completed"
2181
  },
2182
  "tags": []
 
2188
  {
2189
  "cell_type": "code",
2190
  "execution_count": 8,
2191
+ "id": "6bdd285a",
2192
  "metadata": {
2193
  "execution": {
2194
+ "iopub.execute_input": "2023-10-11T08:03:56.853495Z",
2195
+ "iopub.status.busy": "2023-10-11T08:03:56.852946Z",
2196
+ "iopub.status.idle": "2023-10-11T08:04:11.500794Z",
2197
+ "shell.execute_reply": "2023-10-11T08:04:11.499336Z"
2198
  },
2199
  "papermill": {
2200
+ "duration": 14.668001,
2201
+ "end_time": "2023-10-11T08:04:11.503644",
2202
  "exception": false,
2203
+ "start_time": "2023-10-11T08:03:56.835643",
2204
  "status": "completed"
2205
  },
2206
  "tags": []
 
2210
  "name": "stdout",
2211
  "output_type": "stream",
2212
  "text": [
2213
+ "[2023-10-11 08:04:01,096] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
2214
  ]
2215
  },
2216
  {
 
2232
  "name": "stdout",
2233
  "output_type": "stream",
2234
  "text": [
2235
+ "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1732922148\r\n",
2236
  " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
2237
+ "Global seed set to 1732922148\r\n"
2238
  ]
2239
  },
2240
  {
 
2249
  "output_type": "stream",
2250
  "text": [
2251
  "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.12\r\n",
2252
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j\u001b[0m\r\n",
2253
  "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
2254
  "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m\r\n",
2255
  "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments\u001b[0m\r\n",
2256
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n"
2257
  ]
2258
  },
2259
  {
 
2295
  "name": "stdout",
2296
  "output_type": "stream",
2297
  "text": [
2298
+ "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m[Multi-size] v5-L6+6-D2048-E0.01 - layer-expansion B3 (train-ctx=4k, deepspeed_stage_2_offload)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/runs/88lcuk7j\u001b[0m\r\n",
2299
+ "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-5X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjk0OTk4MDcy/version_details/v16\u001b[0m\r\n",
2300
  "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\r\n",
2301
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20231011_080403-88lcuk7j/logs\u001b[0m\r\n"
2302
  ]
2303
  }
2304
  ],
 
2320
  {
2321
  "cell_type": "code",
2322
  "execution_count": 9,
2323
+ "id": "ae4623a1",
2324
  "metadata": {
2325
  "execution": {
2326
+ "iopub.execute_input": "2023-10-11T08:04:11.546046Z",
2327
+ "iopub.status.busy": "2023-10-11T08:04:11.544870Z",
2328
+ "iopub.status.idle": "2023-10-11T08:04:15.274349Z",
2329
+ "shell.execute_reply": "2023-10-11T08:04:15.272957Z"
2330
  },
2331
  "papermill": {
2332
+ "duration": 3.754115,
2333
+ "end_time": "2023-10-11T08:04:15.277163",
2334
  "exception": false,
2335
+ "start_time": "2023-10-11T08:04:11.523048",
2336
  "status": "completed"
2337
  },
2338
  "tags": []
 
2342
  "name": "stdout",
2343
  "output_type": "stream",
2344
  "text": [
2345
+ "[2023-10-11 08:04:13,869] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
2346
  ]
2347
  },
2348
  {
 
2377
  {
2378
  "cell_type": "code",
2379
  "execution_count": 10,
2380
+ "id": "8e1b1152",
2381
  "metadata": {
2382
  "execution": {
2383
+ "iopub.execute_input": "2023-10-11T08:04:15.319747Z",
2384
+ "iopub.status.busy": "2023-10-11T08:04:15.318636Z",
2385
+ "iopub.status.idle": "2023-10-11T08:04:21.268526Z",
2386
+ "shell.execute_reply": "2023-10-11T08:04:21.267073Z"
2387
  },
2388
  "papermill": {
2389
+ "duration": 5.974644,
2390
+ "end_time": "2023-10-11T08:04:21.271495",
2391
  "exception": false,
2392
+ "start_time": "2023-10-11T08:04:15.296851",
2393
  "status": "completed"
2394
  },
2395
  "tags": []
 
2399
  "name": "stdout",
2400
  "output_type": "stream",
2401
  "text": [
2402
+ "[2023-10-11 08:04:19,430] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
2403
  ]
2404
  },
2405
  {
 
2445
  },
2446
  "papermill": {
2447
  "default_parameters": {},
2448
+ "duration": 119.315066,
2449
+ "end_time": "2023-10-11T08:04:21.714050",
2450
  "environment_variables": {},
2451
  "exception": null,
2452
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
2453
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/multi-size-train/v5-L6+6-D2048-layer-baseline.ipynb",
2454
  "parameters": {},
2455
+ "start_time": "2023-10-11T08:02:22.398984",
2456
  "version": "2.4.0"
2457
  }
2458
  },