picocreator commited on
Commit
44c409b
1 Parent(s): 8e80cec

[GHA] experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb result notebook & reports (fallback single file upload)

Browse files
.gitattributes CHANGED
@@ -84,3 +84,4 @@ experiment/memory-bench/logs/v5-L96-D1024-E0_1-mem-ctx-8k-4k.csv filter=lfs diff
84
  experiment/rwkv-x-exp/v5-slim-memory/v5-L6-D1024-E1e-1-ctx4k.ipynb filter=lfs diff=lfs merge=lfs -text
85
  experiment/memory-bench/logs/v5-L6-D1024-E0_1-4k.csv filter=lfs diff=lfs merge=lfs -text
86
  experiment/memory-bench/logs/v5-L6-D1024-E0_1-16k.csv filter=lfs diff=lfs merge=lfs -text
 
 
84
  experiment/rwkv-x-exp/v5-slim-memory/v5-L6-D1024-E1e-1-ctx4k.ipynb filter=lfs diff=lfs merge=lfs -text
85
  experiment/memory-bench/logs/v5-L6-D1024-E0_1-4k.csv filter=lfs diff=lfs merge=lfs -text
86
  experiment/memory-bench/logs/v5-L6-D1024-E0_1-16k.csv filter=lfs diff=lfs merge=lfs -text
87
+ experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb filter=lfs diff=lfs merge=lfs -text
experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb CHANGED
@@ -1,611 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "id": "d16d19a7",
7
- "metadata": {
8
- "papermill": {
9
- "duration": 0.004251,
10
- "end_time": "2023-09-06T01:53:47.272181",
11
- "exception": false,
12
- "start_time": "2023-09-06T01:53:47.267930",
13
- "status": "completed"
14
- },
15
- "tags": []
16
- },
17
- "source": [
18
- "# RWKV v5\n",
19
- "\n",
20
- "Simple memory training for a small model\n",
21
- "\n",
22
- "**Note:** This project assumes you have the rwkv-infctx conda env setup"
23
- ]
24
- },
25
- {
26
- "attachments": {},
27
- "cell_type": "markdown",
28
- "id": "e7eaa6b5",
29
- "metadata": {
30
- "papermill": {
31
- "duration": 0.002635,
32
- "end_time": "2023-09-06T01:53:47.277736",
33
- "exception": false,
34
- "start_time": "2023-09-06T01:53:47.275101",
35
- "status": "completed"
36
- },
37
- "tags": []
38
- },
39
- "source": [
40
- "# Basic Setup"
41
- ]
42
- },
43
- {
44
- "cell_type": "code",
45
- "execution_count": 1,
46
- "id": "43c42c12",
47
- "metadata": {
48
- "execution": {
49
- "iopub.execute_input": "2023-09-06T01:53:47.281975Z",
50
- "iopub.status.busy": "2023-09-06T01:53:47.281689Z",
51
- "iopub.status.idle": "2023-09-06T01:53:48.160757Z",
52
- "shell.execute_reply": "2023-09-06T01:53:48.159854Z"
53
- },
54
- "papermill": {
55
- "duration": 0.883492,
56
- "end_time": "2023-09-06T01:53:48.162633",
57
- "exception": false,
58
- "start_time": "2023-09-06T01:53:47.279141",
59
- "status": "completed"
60
- },
61
- "tags": []
62
- },
63
- "outputs": [
64
- {
65
- "name": "stdout",
66
- "output_type": "stream",
67
- "text": [
68
- "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize32 checkpoint\tnotebook\r\n",
69
- "LICENSE RWKV-v5\t\t RWKV-v5r2\t datapath\toutput\r\n",
70
- "README.md RWKV-v5altwavenet RWKV-v5rstack\t docker\r\n",
71
- "RWKV-v4neo RWKV-v5headsize2x RWKV-v5wavenet model\r\n"
72
- ]
73
- }
74
- ],
75
- "source": [
76
- "# First lets setup the various directories, and init the model\n",
77
- "!ls ../../../../../\n",
78
- "!mkdir -p ../../../../../model/\n",
79
- "!mkdir -p ../../../../../datapath/\n",
80
- "!mkdir -p ../../../../../checkpoint/"
81
- ]
82
- },
83
- {
84
- "cell_type": "code",
85
- "execution_count": 2,
86
- "id": "4f145f3c",
87
- "metadata": {
88
- "execution": {
89
- "iopub.execute_input": "2023-09-06T01:53:48.169937Z",
90
- "iopub.status.busy": "2023-09-06T01:53:48.169610Z",
91
- "iopub.status.idle": "2023-09-06T01:53:50.281603Z",
92
- "shell.execute_reply": "2023-09-06T01:53:50.280695Z"
93
- },
94
- "papermill": {
95
- "duration": 2.117825,
96
- "end_time": "2023-09-06T01:53:50.283651",
97
- "exception": false,
98
- "start_time": "2023-09-06T01:53:48.165826",
99
- "status": "completed"
100
- },
101
- "tags": []
102
- },
103
- "outputs": [
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n",
109
- "\u001b[0m"
110
- ]
111
- }
112
- ],
113
- "source": [
114
- "# Additional dependencies for eval stuff\n",
115
- "!pip install -q aiocsv aiofiles"
116
- ]
117
- },
118
- {
119
- "cell_type": "code",
120
- "execution_count": 3,
121
- "id": "e478c7e4",
122
- "metadata": {
123
- "execution": {
124
- "iopub.execute_input": "2023-09-06T01:53:50.291201Z",
125
- "iopub.status.busy": "2023-09-06T01:53:50.290951Z",
126
- "iopub.status.idle": "2023-09-06T01:53:50.300137Z",
127
- "shell.execute_reply": "2023-09-06T01:53:50.299422Z"
128
- },
129
- "papermill": {
130
- "duration": 0.014857,
131
- "end_time": "2023-09-06T01:53:50.301805",
132
- "exception": false,
133
- "start_time": "2023-09-06T01:53:50.286948",
134
- "status": "completed"
135
- },
136
- "tags": []
137
- },
138
- "outputs": [
139
- {
140
- "name": "stdout",
141
- "output_type": "stream",
142
- "text": [
143
- "DEEPSPEED_STRAT: deepspeed_stage_1\n",
144
- "ENABLE_WANDB: True\n",
145
- "GPU_DEVICES: auto\n",
146
- "DIR_NAME: L12-D2048-E1e-1-ctx4k\n",
147
- "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k\n",
148
- "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
149
- "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
150
- "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
151
- ]
152
- }
153
- ],
154
- "source": [
155
- "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
156
- "GPU_DEVICES=\"auto\"\n",
157
- "ENABLE_WANDB=True\n",
158
- "\n",
159
- "# Layer count and embed dim to start with\n",
160
- "LAYER_COUNT=12\n",
161
- "EMBED_DIM=2048\n",
162
- "\n",
163
- "EMBED_SCALE=0.1\n",
164
- "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
165
- "\n",
166
- "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
167
- "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
168
- "\n",
169
- "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
170
- "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
171
- "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
172
- "\n",
173
- "if ENABLE_WANDB:\n",
174
- " WANDB_MODE=\"online\"\n",
175
- "else:\n",
176
- " WANDB_MODE=\"disabled\"\n",
177
- "\n",
178
- "# Computing the notebook, and various paths\n",
179
- "import os\n",
180
- "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
181
- "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
182
- "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
183
- "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
184
- "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
185
- "\n",
186
- "# Get the notebook dir name\n",
187
- "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
188
- "\n",
189
- "# Log names and dir\n",
190
- "print(\"DIR_NAME:\", DIR_NAME)\n",
191
- "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
192
- "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
193
- "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
194
- "print(\"PROJECT_DIR:\", PROJECT_DIR)"
195
- ]
196
- },
197
- {
198
- "cell_type": "code",
199
- "execution_count": 4,
200
- "id": "c0d1081e",
201
- "metadata": {
202
- "execution": {
203
- "iopub.execute_input": "2023-09-06T01:53:50.309388Z",
204
- "iopub.status.busy": "2023-09-06T01:53:50.309130Z",
205
- "iopub.status.idle": "2023-09-06T01:53:50.530630Z",
206
- "shell.execute_reply": "2023-09-06T01:53:50.529804Z"
207
- },
208
- "papermill": {
209
- "duration": 0.227251,
210
- "end_time": "2023-09-06T01:53:50.532377",
211
- "exception": false,
212
- "start_time": "2023-09-06T01:53:50.305126",
213
- "status": "completed"
214
- },
215
- "tags": []
216
- },
217
- "outputs": [
218
- {
219
- "name": "stdout",
220
- "output_type": "stream",
221
- "text": [
222
- "/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}\r\n"
223
- ]
224
- }
225
- ],
226
- "source": [
227
- "# Init the model\n",
228
- "!cd \"{TRAINER_DIR}\" && \\\n",
229
- " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
230
- " python3 ./init_model.py \\\n",
231
- " --n_layer \"{LAYER_COUNT}\" --n_embd \"{EMBED_DIM}\" \\\n",
232
- " --emb-scale \"{EMBED_SCALE}\" \\\n",
233
- " --vocab_size neox --skip-if-exists \\\n",
234
- " \"../model/{FILENAME_PREFIX}-neox-init.pth\""
235
- ]
236
- },
237
- {
238
- "cell_type": "markdown",
239
- "id": "ddc1086b",
240
- "metadata": {
241
- "papermill": {
242
- "duration": 0.003123,
243
- "end_time": "2023-09-06T01:53:50.538962",
244
- "exception": false,
245
- "start_time": "2023-09-06T01:53:50.535839",
246
- "status": "completed"
247
- },
248
- "tags": []
249
- },
250
- "source": [
251
- "## Enwiki Stage 1 : Foundation 4k model training"
252
- ]
253
- },
254
- {
255
- "cell_type": "code",
256
- "execution_count": 5,
257
- "id": "636df5aa",
258
- "metadata": {
259
- "execution": {
260
- "iopub.execute_input": "2023-09-06T01:53:50.544775Z",
261
- "iopub.status.busy": "2023-09-06T01:53:50.544524Z",
262
- "iopub.status.idle": "2023-09-06T01:53:59.821234Z",
263
- "shell.execute_reply": "2023-09-06T01:53:59.820428Z"
264
- },
265
- "papermill": {
266
- "duration": 9.281152,
267
- "end_time": "2023-09-06T01:53:59.823158",
268
- "exception": false,
269
- "start_time": "2023-09-06T01:53:50.542006",
270
- "status": "completed"
271
- },
272
- "tags": []
273
- },
274
- "outputs": [
275
- {
276
- "name": "stdout",
277
- "output_type": "stream",
278
- "text": [
279
- "\r",
280
- "Saving the dataset (0/5 shards): 0%| | 0/81505 [00:00<?, ? examples/s]"
281
- ]
282
- },
283
- {
284
- "name": "stdout",
285
- "output_type": "stream",
286
- "text": [
287
- "\r",
288
- "Saving the dataset (0/5 shards): 6%| | 5000/81505 [00:00<00:01, 40020.30 examp"
289
- ]
290
- },
291
- {
292
- "name": "stdout",
293
- "output_type": "stream",
294
- "text": [
295
- "\r",
296
- "Saving the dataset (0/5 shards): 13%|▏| 11000/81505 [00:00<00:01, 43937.85 exam"
297
- ]
298
- },
299
- {
300
- "name": "stdout",
301
- "output_type": "stream",
302
- "text": [
303
- "\r",
304
- "Saving the dataset (0/5 shards): 20%|▏| 16301/81505 [00:00<00:01, 46422.21 exam\r",
305
- "Saving the dataset (1/5 shards): 20%|▏| 16301/81505 [00:00<00:01, 46422.21 exam"
306
- ]
307
- },
308
- {
309
- "name": "stdout",
310
- "output_type": "stream",
311
- "text": [
312
- "\r",
313
- "Saving the dataset (1/5 shards): 27%|▎| 22301/81505 [00:00<00:01, 48661.83 exam"
314
- ]
315
- },
316
- {
317
- "name": "stdout",
318
- "output_type": "stream",
319
- "text": [
320
- "\r",
321
- "Saving the dataset (1/5 shards): 35%|▎| 28301/81505 [00:00<00:01, 50444.99 exam"
322
- ]
323
- },
324
- {
325
- "name": "stdout",
326
- "output_type": "stream",
327
- "text": [
328
- "\r",
329
- "Saving the dataset (2/5 shards): 40%|▍| 32602/81505 [00:00<00:00, 50444.99 exam"
330
- ]
331
- },
332
- {
333
- "name": "stdout",
334
- "output_type": "stream",
335
- "text": [
336
- "\r",
337
- "Saving the dataset (2/5 shards): 44%|▍| 35602/81505 [00:00<00:00, 51589.83 exam"
338
- ]
339
- },
340
- {
341
- "name": "stdout",
342
- "output_type": "stream",
343
- "text": [
344
- "\r",
345
- "Saving the dataset (2/5 shards): 51%|▌| 41602/81505 [00:00<00:00, 45740.30 exam"
346
- ]
347
- },
348
- {
349
- "name": "stdout",
350
- "output_type": "stream",
351
- "text": [
352
- "\r",
353
- "Saving the dataset (2/5 shards): 58%|▌| 47602/81505 [00:01<00:00, 46874.25 exam\r",
354
- "Saving the dataset (3/5 shards): 60%|▌| 48903/81505 [00:01<00:00, 46874.25 exam"
355
- ]
356
- },
357
- {
358
- "name": "stdout",
359
- "output_type": "stream",
360
- "text": [
361
- "\r",
362
- "Saving the dataset (3/5 shards): 67%|▋| 54903/81505 [00:01<00:00, 48825.20 exam"
363
- ]
364
- },
365
- {
366
- "name": "stdout",
367
- "output_type": "stream",
368
- "text": [
369
- "\r",
370
- "Saving the dataset (3/5 shards): 75%|▋| 60903/81505 [00:01<00:00, 50738.43 exam"
371
- ]
372
- },
373
- {
374
- "name": "stdout",
375
- "output_type": "stream",
376
- "text": [
377
- "\r",
378
- "Saving the dataset (4/5 shards): 80%|▊| 65204/81505 [00:01<00:00, 50738.43 exam"
379
- ]
380
- },
381
- {
382
- "name": "stdout",
383
- "output_type": "stream",
384
- "text": [
385
- "\r",
386
- "Saving the dataset (4/5 shards): 84%|▊| 68204/81505 [00:01<00:00, 51451.93 exam"
387
- ]
388
- },
389
- {
390
- "name": "stdout",
391
- "output_type": "stream",
392
- "text": [
393
- "\r",
394
- "Saving the dataset (4/5 shards): 92%|▉| 75204/81505 [00:01<00:00, 44264.66 exam"
395
- ]
396
- },
397
- {
398
- "name": "stdout",
399
- "output_type": "stream",
400
- "text": [
401
- "\r",
402
- "Saving the dataset (4/5 shards): 100%|▉| 81204/81505 [00:01<00:00, 45739.71 exam\r",
403
- "Saving the dataset (5/5 shards): 100%|█| 81505/81505 [00:01<00:00, 45739.71 exam\r",
404
- "Saving the dataset (5/5 shards): 100%|█| 81505/81505 [00:01<00:00, 47312.07 exam\r\n",
405
- "\r",
406
- "Saving the dataset (0/1 shards): 0%| | 0/410 [00:00<?, ? examples/s]\r",
407
- "Saving the dataset (1/1 shards): 100%|█| 410/410 [00:00<00:00, 40194.11 examples\r",
408
- "Saving the dataset (1/1 shards): 100%|█| 410/410 [00:00<00:00, 38725.95 examples\r\n"
409
- ]
410
- }
411
- ],
412
- "source": [
413
- "# Lets preload the requried dataset \n",
414
- "!cd \"{TRAINER_DIR}\" && \\\n",
415
- " python3 preload_datapath.py \"{CONFIG_DIR}/config-enwiki-4k.yaml\""
416
- ]
417
- },
418
- {
419
- "cell_type": "code",
420
- "execution_count": 6,
421
- "id": "14045bce",
422
- "metadata": {
423
- "execution": {
424
- "iopub.execute_input": "2023-09-06T01:53:59.833777Z",
425
- "iopub.status.busy": "2023-09-06T01:53:59.833515Z",
426
- "iopub.status.idle": "2023-09-06T01:54:00.054878Z",
427
- "shell.execute_reply": "2023-09-06T01:54:00.054138Z"
428
- },
429
- "papermill": {
430
- "duration": 0.228539,
431
- "end_time": "2023-09-06T01:54:00.056582",
432
- "exception": false,
433
- "start_time": "2023-09-06T01:53:59.828043",
434
- "status": "completed"
435
- },
436
- "tags": []
437
- },
438
- "outputs": [
439
- {
440
- "name": "stdout",
441
- "output_type": "stream",
442
- "text": [
443
- "/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}\r\n"
444
- ]
445
- }
446
- ],
447
- "source": [
448
- "# Start the foundation model training\n",
449
- "!cd \"{TRAINER_DIR}\" && \\\n",
450
- " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
451
- " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
452
- " python lightning_trainer.py fit \\\n",
453
- " -c \"{CONFIG_DIR}/config-enwiki-4k.yaml\" \\\n",
454
- " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Foundation (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
455
- " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
456
- " --trainer.devices=\"{GPU_DEVICES}\" \\\n",
457
- " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/\" \\\n",
458
- " --model.load_model=\"../model/{FILENAME_PREFIX}-neox-init.pth\" \\\n",
459
- " --model.ctx_len=4096 \\\n",
460
- " --model.bptt_learning_range=1"
461
- ]
462
- },
463
- {
464
- "cell_type": "code",
465
- "execution_count": 7,
466
- "id": "498791c4",
467
- "metadata": {
468
- "execution": {
469
- "iopub.execute_input": "2023-09-06T01:54:00.067029Z",
470
- "iopub.status.busy": "2023-09-06T01:54:00.066776Z",
471
- "iopub.status.idle": "2023-09-06T01:54:00.505182Z",
472
- "shell.execute_reply": "2023-09-06T01:54:00.504440Z"
473
- },
474
- "papermill": {
475
- "duration": 0.445543,
476
- "end_time": "2023-09-06T01:54:00.506877",
477
- "exception": false,
478
- "start_time": "2023-09-06T01:54:00.061334",
479
- "status": "completed"
480
- },
481
- "tags": []
482
- },
483
- "outputs": [
484
- {
485
- "name": "stdout",
486
- "output_type": "stream",
487
- "text": [
488
- "/usr/bin/sh: 1: python: not found\r\n"
489
- ]
490
- },
491
- {
492
- "name": "stdout",
493
- "output_type": "stream",
494
- "text": [
495
- "ls: cannot access '../model/v5r3-L12-D2048-E0_1-enwiki-4k.pth': No such file or directory\r\n"
496
- ]
497
- }
498
- ],
499
- "source": [
500
- "# Lets export the model from the checkpoint\n",
501
- "!cd \"{TRAINER_DIR}\" && \\\n",
502
- " python export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/last.ckpt\" \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"bf16\"\n",
503
- "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\""
504
- ]
505
- },
506
- {
507
- "cell_type": "code",
508
- "execution_count": 8,
509
- "id": "eeae81c5",
510
- "metadata": {
511
- "execution": {
512
- "iopub.execute_input": "2023-09-06T01:54:00.517808Z",
513
- "iopub.status.busy": "2023-09-06T01:54:00.517561Z",
514
- "iopub.status.idle": "2023-09-06T01:54:00.734217Z",
515
- "shell.execute_reply": "2023-09-06T01:54:00.733477Z"
516
- },
517
- "papermill": {
518
- "duration": 0.223994,
519
- "end_time": "2023-09-06T01:54:00.735863",
520
- "exception": false,
521
- "start_time": "2023-09-06T01:54:00.511869",
522
- "status": "completed"
523
- },
524
- "tags": []
525
- },
526
- "outputs": [
527
- {
528
- "name": "stdout",
529
- "output_type": "stream",
530
- "text": [
531
- "/usr/bin/sh: 1: cd: can't cd to {INFERENCE_DIR}\r\n"
532
- ]
533
- }
534
- ],
535
- "source": [
536
- "# # Lets do a quick dragon prompt validation\n",
537
- "!cd \"{INFERENCE_DIR}\" && \\\n",
538
- " export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
539
- " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"cuda fp32\""
540
- ]
541
- },
542
- {
543
- "cell_type": "code",
544
- "execution_count": 9,
545
- "id": "e7fcd1dc",
546
- "metadata": {
547
- "execution": {
548
- "iopub.execute_input": "2023-09-06T01:54:00.747330Z",
549
- "iopub.status.busy": "2023-09-06T01:54:00.746699Z",
550
- "iopub.status.idle": "2023-09-06T01:54:00.976416Z",
551
- "shell.execute_reply": "2023-09-06T01:54:00.975671Z"
552
- },
553
- "papermill": {
554
- "duration": 0.237171,
555
- "end_time": "2023-09-06T01:54:00.978059",
556
- "exception": false,
557
- "start_time": "2023-09-06T01:54:00.740888",
558
- "status": "completed"
559
- },
560
- "tags": []
561
- },
562
- "outputs": [
563
- {
564
- "name": "stdout",
565
- "output_type": "stream",
566
- "text": [
567
- "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n"
568
- ]
569
- }
570
- ],
571
- "source": [
572
- "# Lets do a quick memory test\n",
573
- "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
574
- " python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-enwiki-4k.pth\""
575
- ]
576
- }
577
- ],
578
- "metadata": {
579
- "kernelspec": {
580
- "display_name": "Python 3 (ipykernel)",
581
- "language": "python",
582
- "name": "python3"
583
- },
584
- "language_info": {
585
- "codemirror_mode": {
586
- "name": "ipython",
587
- "version": 3
588
- },
589
- "file_extension": ".py",
590
- "mimetype": "text/x-python",
591
- "name": "python",
592
- "nbconvert_exporter": "python",
593
- "pygments_lexer": "ipython3",
594
- "version": "3.10.12"
595
- },
596
- "papermill": {
597
- "default_parameters": {},
598
- "duration": 14.829906,
599
- "end_time": "2023-09-06T01:54:01.098909",
600
- "environment_variables": {},
601
- "exception": null,
602
- "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb",
603
- "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part1.ipynb",
604
- "parameters": {},
605
- "start_time": "2023-09-06T01:53:46.269003",
606
- "version": "2.4.0"
607
- }
608
- },
609
- "nbformat": 4,
610
- "nbformat_minor": 5
611
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aeaa7a18c512ab269373405a851820ebf829612158ad8c004e3b12edf85594b
3
+ size 23703731