diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb" @@ -3,13 +3,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "3174f701", + "id": "fb8bab66", "metadata": { "papermill": { - "duration": 0.005065, - "end_time": "2023-09-06T17:12:47.606560", + "duration": 0.004984, + "end_time": "2023-09-06T18:15:07.813560", "exception": false, - "start_time": "2023-09-06T17:12:47.601495", + "start_time": "2023-09-06T18:15:07.808576", "status": "completed" }, "tags": [] @@ -25,13 +25,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "cb5debdd", + "id": "644a18dd", "metadata": { "papermill": { - "duration": 0.002115, - "end_time": "2023-09-06T17:12:47.612606", + "duration": 0.002407, + "end_time": "2023-09-06T18:15:07.820470", "exception": false, - "start_time": "2023-09-06T17:12:47.610491", + "start_time": "2023-09-06T18:15:07.818063", "status": "completed" }, "tags": [] @@ -43,19 +43,19 @@ { "cell_type": "code", "execution_count": 1, - "id": "41bbf98d", + "id": "88954417", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:12:47.618377Z", - "iopub.status.busy": "2023-09-06T17:12:47.618157Z", - "iopub.status.idle": "2023-09-06T17:12:48.494513Z", - "shell.execute_reply": "2023-09-06T17:12:48.493600Z" + "iopub.execute_input": "2023-09-06T18:15:07.826313Z", + "iopub.status.busy": "2023-09-06T18:15:07.826023Z", + "iopub.status.idle": "2023-09-06T18:15:08.710191Z", + "shell.execute_reply": "2023-09-06T18:15:08.709357Z" }, "papermill": { - "duration": 0.881639, - "end_time": "2023-09-06T17:12:48.496472", + "duration": 0.889532, + "end_time": "2023-09-06T18:15:08.712219", "exception": false, - "start_time": "2023-09-06T17:12:47.614833", + "start_time": "2023-09-06T18:15:07.822687", "status": "completed" }, "tags": [] @@ -83,19 +83,19 @@ { "cell_type": "code", "execution_count": 2, - "id": "bc308e46", + "id": "54728414", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:12:48.506904Z", - "iopub.status.busy": "2023-09-06T17:12:48.506660Z", - "iopub.status.idle": "2023-09-06T17:12:50.610312Z", - "shell.execute_reply": "2023-09-06T17:12:50.609442Z" + "iopub.execute_input": "2023-09-06T18:15:08.723120Z", + "iopub.status.busy": "2023-09-06T18:15:08.722681Z", + "iopub.status.idle": "2023-09-06T18:15:10.844392Z", + "shell.execute_reply": "2023-09-06T18:15:10.843613Z" }, "papermill": { - "duration": 2.110898, - "end_time": "2023-09-06T17:12:50.612132", + "duration": 2.129186, + "end_time": "2023-09-06T18:15:10.846223", "exception": false, - "start_time": "2023-09-06T17:12:48.501234", + "start_time": "2023-09-06T18:15:08.717037", "status": "completed" }, "tags": [] @@ -118,19 +118,19 @@ { "cell_type": "code", "execution_count": 3, - "id": "5ecce62b", + "id": "4e5c05c9", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:12:50.622765Z", - "iopub.status.busy": "2023-09-06T17:12:50.622510Z", - "iopub.status.idle": "2023-09-06T17:12:50.631551Z", - "shell.execute_reply": "2023-09-06T17:12:50.630955Z" + "iopub.execute_input": "2023-09-06T18:15:10.856132Z", + "iopub.status.busy": "2023-09-06T18:15:10.855690Z", + "iopub.status.idle": "2023-09-06T18:15:10.864690Z", + "shell.execute_reply": "2023-09-06T18:15:10.864101Z" }, "papermill": { - "duration": 0.01615, - "end_time": "2023-09-06T17:12:50.633066", + "duration": 0.015147, + "end_time": "2023-09-06T18:15:10.866187", "exception": false, - "start_time": "2023-09-06T17:12:50.616916", + "start_time": "2023-09-06T18:15:10.851040", "status": "completed" }, "tags": [] @@ -140,7 +140,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "DEEPSPEED_STRAT: deepspeed_stage_1\n", + "DEEPSPEED_STRAT:" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " deepspeed_stage_1\n", "ENABLE_WANDB: True\n", "GPU_DEVICES: auto\n", "DIR_NAME: L12-D2048-E1e-1-ctx4k\n", @@ -197,19 +204,19 @@ { "cell_type": "code", "execution_count": 4, - "id": "ecee273d", + "id": "9a735016", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:12:50.643057Z", - "iopub.status.busy": "2023-09-06T17:12:50.642933Z", - "iopub.status.idle": "2023-09-06T17:13:06.898900Z", - "shell.execute_reply": "2023-09-06T17:13:06.898162Z" + "iopub.execute_input": "2023-09-06T18:15:10.875858Z", + "iopub.status.busy": "2023-09-06T18:15:10.875465Z", + "iopub.status.idle": "2023-09-06T18:15:26.773342Z", + "shell.execute_reply": "2023-09-06T18:15:26.772573Z" }, "papermill": { - "duration": 16.262552, - "end_time": "2023-09-06T17:13:06.900660", + "duration": 15.904517, + "end_time": "2023-09-06T18:15:26.775109", "exception": false, - "start_time": "2023-09-06T17:12:50.638108", + "start_time": "2023-09-06T18:15:10.870592", "status": "completed" }, "tags": [] @@ -219,9 +226,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2023-09-06 17:12:50-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 13.33.33.55, 13.33.33.102, 13.33.33.110, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.\r\n", + "--2023-09-06 18:15:10-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 13.33.33.110, 13.33.33.20, 13.33.33.55, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|13.33.33.110|:443... connected.\r\n", "HTTP request sent, awaiting response... " ] }, @@ -230,8 +237,8 @@ "output_type": "stream", "text": [ "302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-09-06 17:12:51-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279570&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTU3MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=vlJUg9zPT-Ie2MebXI3t7Sfovkvk30xvNya0WqXvAogwISiGWpmGNd3IKa0rDNdEdrQ3uREbJSFhcam12E5VepvwzlhCsUFsI4W9YnOQ8JOVAtNH5fzk16zGizK7%7EtmvJszRMbwukNZOp6TGz4kqEQPgwAwv26tPs9mP2ATP59hiH30jVnK1yjYot7Y2UAC6vKBdF3%7E%7EZUsL-ZfcYL0lTLE7xPmtgafMs3DM-TJhA1wPXw2r-ByBDo2l6edDKcosW36ncjch5kT5XXrnmxEhX4Yll0kAYuwvfXZI2AsIfeopfeKyYhg0KKeAwrPaxHzAcfQSHQn%7EVIjtW-Ro-8XAUw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 18:15:11-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fcd2c54e435c74dc2a43bd3bbde6594de9c6937156caf9f72a77137ed3d49539?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L12-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L12-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694283311&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MzMxMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZjZDJjNTRlNDM1Yzc0ZGMyYTQzYmQzYmJkZTY1OTRkZTljNjkzNzE1NmNhZjlmNzJhNzcxMzdlZDNkNDk1Mzk%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=umgcHAKKVfM%7Elnvkc01zRnwcLHn3wrQfHfl-f-B8cnLRauI-kG63DbJ3BWLUFwsnUXKFrGoEYt8IW3AjE2J9QanT4tt1Zh34ojm5pdkTt4PvrIoX0iCwHRRIJGgV9h%7EF%7EMpsuweAJiHAbk61U4GSdt3fnpVaAKUKKa-VNDcmS3LwTuOx3gQgTqbTc-9ZMz14QcAVZV%7EgGZO5D1Owr0g0db9eatciOvhG7%7EnN%7ES%7EQIhVVZENXFPv0Ej8Jr11N0lmHdGU%7EBXm0fqUL1lCSaAEwCoIF%7EGrG2gtP049PGM9tapGgm6-4y4HbfsNVIxH-iRn-c2lvkjCpUSIOpMzce6wNvA__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... " ] }, @@ -239,8 +246,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "18.155.68.98, 18.155.68.128, 18.155.68.94, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.98|:443... connected.\r\n", + "18.155.68.128, 18.155.68.73, 18.155.68.98, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.128|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "HTTP request sent, awaiting response... 200 OK\r\n", "Length: 1721187013 (1.6G) [binary/octet-stream]\r\n", "Saving to: ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’\r\n", @@ -254,7 +267,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D 1%[ ] 21.42M 107MB/s " + " v5r3-L12-D 1%[ ] 21.14M 106MB/s " ] }, { @@ -262,7 +275,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2 2%[ ] 43.83M 110MB/s " + " v5r3-L12-D2 2%[ ] 43.53M 109MB/s " ] }, { @@ -270,7 +283,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D20 4%[ ] 66.17M 110MB/s " + " v5r3-L12-D20 4%[ ] 65.91M 110MB/s " ] }, { @@ -278,7 +291,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D204 5%[> ] 88.57M 111MB/s " + " v5r3-L12-D204 5%[> ] 88.31M 110MB/s " ] }, { @@ -286,7 +299,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048 6%[> ] 110.92M 111MB/s " + " v5r3-L12-D2048 6%[> ] 110.72M 111MB/s " ] }, { @@ -294,7 +307,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048- 8%[> ] 133.36M 111MB/s " + " v5r3-L12-D2048- 8%[> ] 133.11M 111MB/s " ] }, { @@ -302,7 +315,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E 9%[> ] 155.76M 111MB/s " + " v5r3-L12-D2048-E 9%[> ] 155.46M 111MB/s " ] }, { @@ -310,7 +323,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E0 10%[=> ] 178.16M 111MB/s " + " v5r3-L12-D2048-E0 10%[=> ] 177.86M 111MB/s " ] }, { @@ -318,7 +331,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E0_ 12%[=> ] 200.56M 111MB/s " + " v5r3-L12-D2048-E0_ 12%[=> ] 200.27M 111MB/s " ] }, { @@ -326,7 +339,7 @@ "output_type": "stream", "text": [ "\r", - "v5r3-L12-D2048-E0_1 13%[=> ] 222.97M 111MB/s " + "v5r3-L12-D2048-E0_1 13%[=> ] 222.66M 111MB/s " ] }, { @@ -334,7 +347,7 @@ "output_type": "stream", "text": [ "\r", - "5r3-L12-D2048-E0_1- 14%[=> ] 244.89M 111MB/s " + "5r3-L12-D2048-E0_1- 14%[=> ] 245.07M 111MB/s " ] }, { @@ -342,7 +355,7 @@ "output_type": "stream", "text": [ "\r", - "r3-L12-D2048-E0_1-e 16%[==> ] 267.32M 111MB/s " + "r3-L12-D2048-E0_1-e 16%[==> ] 267.48M 111MB/s " ] }, { @@ -350,7 +363,7 @@ "output_type": "stream", "text": [ "\r", - "3-L12-D2048-E0_1-en 17%[==> ] 289.71M 111MB/s " + "3-L12-D2048-E0_1-en 17%[==> ] 289.89M 111MB/s " ] }, { @@ -358,7 +371,7 @@ "output_type": "stream", "text": [ "\r", - "-L12-D2048-E0_1-enw 19%[==> ] 312.11M 111MB/s " + "-L12-D2048-E0_1-enw 19%[==> ] 312.30M 112MB/s " ] }, { @@ -366,7 +379,7 @@ "output_type": "stream", "text": [ "\r", - "L12-D2048-E0_1-enwi 20%[===> ] 334.51M 111MB/s eta 12s " + "L12-D2048-E0_1-enwi 20%[===> ] 334.71M 112MB/s eta 12s " ] }, { @@ -374,7 +387,7 @@ "output_type": "stream", "text": [ "\r", - "12-D2048-E0_1-enwik 21%[===> ] 356.91M 112MB/s eta 12s " + "12-D2048-E0_1-enwik 21%[===> ] 357.11M 112MB/s eta 12s " ] }, { @@ -382,7 +395,7 @@ "output_type": "stream", "text": [ "\r", - "2-D2048-E0_1-enwiki 23%[===> ] 379.31M 112MB/s eta 12s " + "2-D2048-E0_1-enwiki 23%[===> ] 379.51M 112MB/s eta 12s " ] }, { @@ -390,7 +403,7 @@ "output_type": "stream", "text": [ "\r", - "-D2048-E0_1-enwiki- 24%[===> ] 401.72M 112MB/s eta 12s " + "-D2048-E0_1-enwiki- 24%[===> ] 401.92M 112MB/s eta 12s " ] }, { @@ -398,7 +411,7 @@ "output_type": "stream", "text": [ "\r", - "D2048-E0_1-enwiki-4 25%[====> ] 424.13M 112MB/s eta 12s " + "D2048-E0_1-enwiki-4 25%[====> ] 424.32M 112MB/s eta 12s " ] }, { @@ -406,7 +419,7 @@ "output_type": "stream", "text": [ "\r", - "2048-E0_1-enwiki-4k 27%[====> ] 446.53M 112MB/s eta 11s " + "2048-E0_1-enwiki-4k 27%[====> ] 446.72M 112MB/s eta 11s " ] }, { @@ -414,7 +427,7 @@ "output_type": "stream", "text": [ "\r", - "048-E0_1-enwiki-4k. 28%[====> ] 468.94M 112MB/s eta 11s " + "048-E0_1-enwiki-4k. 28%[====> ] 469.12M 112MB/s eta 11s " ] }, { @@ -422,7 +435,7 @@ "output_type": "stream", "text": [ "\r", - "48-E0_1-enwiki-4k.p 29%[====> ] 491.34M 112MB/s eta 11s " + "48-E0_1-enwiki-4k.p 29%[====> ] 491.52M 112MB/s eta 11s " ] }, { @@ -430,7 +443,7 @@ "output_type": "stream", "text": [ "\r", - "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.75M 112MB/s eta 11s " + "8-E0_1-enwiki-4k.pt 31%[=====> ] 513.93M 112MB/s eta 11s " ] }, { @@ -438,7 +451,7 @@ "output_type": "stream", "text": [ "\r", - "-E0_1-enwiki-4k.pth 32%[=====> ] 536.15M 112MB/s eta 11s " + "-E0_1-enwiki-4k.pth 32%[=====> ] 536.33M 112MB/s eta 11s " ] }, { @@ -446,7 +459,7 @@ "output_type": "stream", "text": [ "\r", - "E0_1-enwiki-4k.pth 34%[=====> ] 558.51M 112MB/s eta 10s " + "E0_1-enwiki-4k.pth 34%[=====> ] 558.73M 112MB/s eta 10s " ] }, { @@ -454,7 +467,7 @@ "output_type": "stream", "text": [ "\r", - "0_1-enwiki-4k.pth 35%[======> ] 580.91M 112MB/s eta 10s " + "0_1-enwiki-4k.pth 35%[======> ] 581.14M 112MB/s eta 10s " ] }, { @@ -462,7 +475,7 @@ "output_type": "stream", "text": [ "\r", - "_1-enwiki-4k.pth 36%[======> ] 603.30M 112MB/s eta 10s " + "_1-enwiki-4k.pth 36%[======> ] 602.65M 112MB/s eta 10s " ] }, { @@ -470,7 +483,7 @@ "output_type": "stream", "text": [ "\r", - "1-enwiki-4k.pth 38%[======> ] 625.71M 112MB/s eta 10s " + "1-enwiki-4k.pth 38%[======> ] 624.87M 112MB/s eta 10s " ] }, { @@ -478,7 +491,7 @@ "output_type": "stream", "text": [ "\r", - "-enwiki-4k.pth 39%[======> ] 648.10M 112MB/s eta 10s " + "-enwiki-4k.pth 39%[======> ] 647.26M 112MB/s eta 10s " ] }, { @@ -486,7 +499,7 @@ "output_type": "stream", "text": [ "\r", - "enwiki-4k.pth 40%[=======> ] 670.50M 112MB/s eta 9s " + "enwiki-4k.pth 40%[=======> ] 669.68M 112MB/s eta 9s " ] }, { @@ -494,7 +507,7 @@ "output_type": "stream", "text": [ "\r", - "nwiki-4k.pth 42%[=======> ] 692.78M 112MB/s eta 9s " + "nwiki-4k.pth 42%[=======> ] 692.08M 112MB/s eta 9s " ] }, { @@ -502,7 +515,7 @@ "output_type": "stream", "text": [ "\r", - "wiki-4k.pth 43%[=======> ] 715.16M 112MB/s eta 9s " + "wiki-4k.pth 43%[=======> ] 714.45M 112MB/s eta 9s " ] }, { @@ -510,7 +523,7 @@ "output_type": "stream", "text": [ "\r", - "iki-4k.pth 44%[=======> ] 737.57M 112MB/s eta 9s " + "iki-4k.pth 44%[=======> ] 736.88M 112MB/s eta 9s " ] }, { @@ -518,7 +531,7 @@ "output_type": "stream", "text": [ "\r", - "ki-4k.pth 46%[========> ] 759.99M 112MB/s eta 9s " + "ki-4k.pth 46%[========> ] 759.32M 112MB/s eta 9s " ] }, { @@ -526,7 +539,7 @@ "output_type": "stream", "text": [ "\r", - "i-4k.pth 47%[========> ] 782.39M 112MB/s eta 8s " + "i-4k.pth 47%[========> ] 781.71M 112MB/s eta 8s " ] }, { @@ -534,7 +547,7 @@ "output_type": "stream", "text": [ "\r", - "-4k.pth 49%[========> ] 804.80M 112MB/s eta 8s " + "-4k.pth 48%[========> ] 804.12M 112MB/s eta 8s " ] }, { @@ -542,7 +555,7 @@ "output_type": "stream", "text": [ "\r", - "4k.pth 50%[=========> ] 827.18M 112MB/s eta 8s " + "4k.pth 50%[=========> ] 826.52M 112MB/s eta 8s " ] }, { @@ -550,7 +563,7 @@ "output_type": "stream", "text": [ "\r", - "k.pth 51%[=========> ] 849.60M 112MB/s eta 8s " + "k.pth 51%[=========> ] 848.92M 112MB/s eta 8s " ] }, { @@ -558,7 +571,7 @@ "output_type": "stream", "text": [ "\r", - ".pth 53%[=========> ] 872.00M 112MB/s eta 8s " + ".pth 53%[=========> ] 871.34M 112MB/s eta 8s " ] }, { @@ -566,7 +579,7 @@ "output_type": "stream", "text": [ "\r", - "pth 54%[=========> ] 894.41M 112MB/s eta 7s " + "pth 54%[=========> ] 893.74M 112MB/s eta 7s " ] }, { @@ -574,7 +587,7 @@ "output_type": "stream", "text": [ "\r", - "th 55%[==========> ] 916.82M 112MB/s eta 7s " + "th 55%[==========> ] 916.15M 112MB/s eta 7s " ] }, { @@ -582,7 +595,7 @@ "output_type": "stream", "text": [ "\r", - "h 57%[==========> ] 939.22M 112MB/s eta 7s " + "h 57%[==========> ] 938.55M 112MB/s eta 7s " ] }, { @@ -590,7 +603,7 @@ "output_type": "stream", "text": [ "\r", - " 58%[==========> ] 961.63M 112MB/s eta 7s " + " 58%[==========> ] 960.96M 112MB/s eta 7s " ] }, { @@ -598,7 +611,7 @@ "output_type": "stream", "text": [ "\r", - " v 59%[==========> ] 984.03M 112MB/s eta 7s " + " v 59%[==========> ] 983.35M 112MB/s eta 7s " ] }, { @@ -662,7 +675,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12- 69%[============> ] 1.11G 103MB/s eta 5s " + " v5r3-L12- 69%[============> ] 1.11G 104MB/s eta 5s " ] }, { @@ -670,7 +683,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D 70%[=============> ] 1.13G 101MB/s eta 5s " + " v5r3-L12-D 70%[=============> ] 1.13G 105MB/s eta 5s " ] }, { @@ -678,7 +691,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2 71%[=============> ] 1.14G 98.7MB/s eta 5s " + " v5r3-L12-D2 72%[=============> ] 1.16G 105MB/s eta 5s " ] }, { @@ -686,7 +699,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D20 72%[=============> ] 1.16G 96.7MB/s eta 4s " + " v5r3-L12-D20 73%[=============> ] 1.18G 104MB/s eta 4s " ] }, { @@ -694,7 +707,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D204 73%[=============> ] 1.17G 93.8MB/s eta 4s " + " v5r3-L12-D204 74%[=============> ] 1.20G 105MB/s eta 4s " ] }, { @@ -702,7 +715,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048 74%[=============> ] 1.19G 93.4MB/s eta 4s " + " v5r3-L12-D2048 76%[==============> ] 1.22G 105MB/s eta 4s " ] }, { @@ -710,7 +723,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048- 75%[==============> ] 1.21G 91.2MB/s eta 4s " + " v5r3-L12-D2048- 77%[==============> ] 1.24G 104MB/s eta 4s " ] }, { @@ -718,7 +731,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E 76%[==============> ] 1.22G 89.5MB/s eta 4s " + " v5r3-L12-D2048-E 79%[==============> ] 1.27G 105MB/s eta 4s " ] }, { @@ -726,7 +739,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E0 77%[==============> ] 1.24G 89.3MB/s eta 3s " + " v5r3-L12-D2048-E0 80%[===============> ] 1.29G 105MB/s eta 3s " ] }, { @@ -734,7 +747,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L12-D2048-E0_ 78%[==============> ] 1.26G 85.2MB/s eta 3s " + " v5r3-L12-D2048-E0_ 81%[===============> ] 1.31G 104MB/s eta 3s " ] }, { @@ -742,7 +755,7 @@ "output_type": "stream", "text": [ "\r", - "v5r3-L12-D2048-E0_1 79%[==============> ] 1.28G 85.1MB/s eta 3s " + "v5r3-L12-D2048-E0_1 83%[===============> ] 1.33G 105MB/s eta 3s " ] }, { @@ -750,7 +763,7 @@ "output_type": "stream", "text": [ "\r", - "5r3-L12-D2048-E0_1- 81%[===============> ] 1.30G 85.5MB/s eta 3s " + "5r3-L12-D2048-E0_1- 84%[===============> ] 1.35G 105MB/s eta 3s " ] }, { @@ -758,7 +771,7 @@ "output_type": "stream", "text": [ "\r", - "r3-L12-D2048-E0_1-e 82%[===============> ] 1.32G 84.6MB/s eta 3s " + "r3-L12-D2048-E0_1-e 85%[================> ] 1.38G 104MB/s eta 3s " ] }, { @@ -766,7 +779,7 @@ "output_type": "stream", "text": [ "\r", - "3-L12-D2048-E0_1-en 83%[===============> ] 1.34G 85.0MB/s eta 3s " + "3-L12-D2048-E0_1-en 87%[================> ] 1.40G 103MB/s eta 2s " ] }, { @@ -774,7 +787,7 @@ "output_type": "stream", "text": [ "\r", - "-L12-D2048-E0_1-enw 85%[================> ] 1.37G 86.9MB/s eta 3s " + "-L12-D2048-E0_1-enw 88%[================> ] 1.42G 110MB/s eta 2s " ] }, { @@ -782,7 +795,7 @@ "output_type": "stream", "text": [ "\r", - "L12-D2048-E0_1-enwi 86%[================> ] 1.39G 92.5MB/s eta 3s " + "L12-D2048-E0_1-enwi 89%[================> ] 1.44G 111MB/s eta 2s " ] }, { @@ -790,7 +803,7 @@ "output_type": "stream", "text": [ "\r", - "12-D2048-E0_1-enwik 87%[================> ] 1.41G 94.5MB/s eta 3s " + "12-D2048-E0_1-enwik 91%[=================> ] 1.46G 111MB/s eta 2s " ] }, { @@ -798,7 +811,7 @@ "output_type": "stream", "text": [ "\r", - "2-D2048-E0_1-enwiki 89%[================> ] 1.43G 96.2MB/s eta 3s " + "2-D2048-E0_1-enwiki 92%[=================> ] 1.48G 110MB/s eta 2s " ] }, { @@ -806,7 +819,7 @@ "output_type": "stream", "text": [ "\r", - "-D2048-E0_1-enwiki- 90%[=================> ] 1.45G 99.5MB/s eta 1s " + "-D2048-E0_1-enwiki- 93%[=================> ] 1.50G 110MB/s eta 1s " ] }, { @@ -814,7 +827,7 @@ "output_type": "stream", "text": [ "\r", - "D2048-E0_1-enwiki-4 92%[=================> ] 1.47G 101MB/s eta 1s " + "D2048-E0_1-enwiki-4 95%[==================> ] 1.52G 110MB/s eta 1s " ] }, { @@ -822,7 +835,7 @@ "output_type": "stream", "text": [ "\r", - "2048-E0_1-enwiki-4k 93%[=================> ] 1.50G 104MB/s eta 1s " + "2048-E0_1-enwiki-4k 96%[==================> ] 1.55G 110MB/s eta 1s " ] }, { @@ -830,7 +843,7 @@ "output_type": "stream", "text": [ "\r", - "048-E0_1-enwiki-4k. 94%[=================> ] 1.52G 105MB/s eta 1s " + "048-E0_1-enwiki-4k. 97%[==================> ] 1.57G 110MB/s eta 1s " ] }, { @@ -838,7 +851,7 @@ "output_type": "stream", "text": [ "\r", - "48-E0_1-enwiki-4k.p 96%[==================> ] 1.54G 107MB/s eta 1s " + "48-E0_1-enwiki-4k.p 99%[==================> ] 1.59G 110MB/s eta 1s " ] }, { @@ -846,25 +859,9 @@ "output_type": "stream", "text": [ "\r", - "8-E0_1-enwiki-4k.pt 97%[==================> ] 1.56G 107MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "-E0_1-enwiki-4k.pth 98%[==================> ] 1.58G 111MB/s eta 0s " - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 112MB/s in 15s \r\n", + "v5r3-L12-D2048-E0_1 100%[===================>] 1.60G 110MB/s in 15s \r\n", "\r\n", - "2023-09-06 17:13:06 (106 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", + "2023-09-06 18:15:26 (110 MB/s) - ‘v5r3-L12-D2048-E0_1-enwiki-4k.pth’ saved [1721187013/1721187013]\r\n", "\r\n" ] }, @@ -872,9 +869,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "total 1.5G\r\n", - "drwxr-xr-x 2 root root 3 Sep 6 17:12 .\r\n", - "drwxr-xr-x 20 root root 24 Sep 6 17:12 ..\r\n", + "total 1.6G\r\n", + "drwxr-xr-x 2 root root 3 Sep 6 18:15 .\r\n", + "drwxr-xr-x 20 root root 24 Sep 6 18:15 ..\r\n", "-rw-r--r-- 1 root root 1.7G Sep 6 15:04 v5r3-L12-D2048-E0_1-enwiki-4k.pth\r\n" ] } @@ -891,13 +888,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "326602ab", + "id": "2b6d6b9e", "metadata": { "papermill": { - "duration": 0.005225, - "end_time": "2023-09-06T17:13:06.914108", + "duration": 0.005279, + "end_time": "2023-09-06T18:15:26.791307", "exception": false, - "start_time": "2023-09-06T17:13:06.908883", + "start_time": "2023-09-06T18:15:26.786028", "status": "completed" }, "tags": [] @@ -909,19 +906,19 @@ { "cell_type": "code", "execution_count": 5, - "id": "e3aa35e9", + "id": "ec611ca0", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:13:06.925901Z", - "iopub.status.busy": "2023-09-06T17:13:06.925655Z", - "iopub.status.idle": "2023-09-06T17:13:14.291842Z", - "shell.execute_reply": "2023-09-06T17:13:14.291053Z" + "iopub.execute_input": "2023-09-06T18:15:26.803278Z", + "iopub.status.busy": "2023-09-06T18:15:26.803020Z", + "iopub.status.idle": "2023-09-06T18:15:34.152162Z", + "shell.execute_reply": "2023-09-06T18:15:34.151364Z" }, "papermill": { - "duration": 7.374402, - "end_time": "2023-09-06T17:13:14.293884", + "duration": 7.357428, + "end_time": "2023-09-06T18:15:34.154153", "exception": false, - "start_time": "2023-09-06T17:13:06.919482", + "start_time": "2023-09-06T18:15:26.796725", "status": "completed" }, "tags": [] @@ -940,12 +937,12 @@ "output_type": "stream", "text": [ "\r", - "Saving the dataset (1/1 shards): 100%|█| 14932/14932 [00:00<00:00, 223458.37 exa\r", - "Saving the dataset (1/1 shards): 100%|█| 14932/14932 [00:00<00:00, 222084.37 exa\r\n", + "Saving the dataset (1/1 shards): 100%|█| 14932/14932 [00:00<00:00, 229773.44 exa\r", + "Saving the dataset (1/1 shards): 100%|█| 14932/14932 [00:00<00:00, 228347.58 exa\r\n", "\r", "Saving the dataset (0/1 shards): 0%| | 0/76 [00:00\r\n", " cli_main()\r\n", @@ -140335,10 +140141,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/uklb27ld\u001b[0m\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v25\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L12-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f9fnknh5\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v32\u001b[0m\r\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_173728-uklb27ld/logs\u001b[0m\r\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_183940-f9fnknh5/logs\u001b[0m\r\n" ] } ], @@ -140363,19 +140169,19 @@ { "cell_type": "code", "execution_count": 16, - "id": "3c4e1a84", + "id": "f9b70eea", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:37:42.501242Z", - "iopub.status.busy": "2023-09-06T17:37:42.500756Z", - "iopub.status.idle": "2023-09-06T17:37:44.926179Z", - "shell.execute_reply": "2023-09-06T17:37:44.925416Z" + "iopub.execute_input": "2023-09-06T18:39:54.579000Z", + "iopub.status.busy": "2023-09-06T18:39:54.578746Z", + "iopub.status.idle": "2023-09-06T18:39:57.009190Z", + "shell.execute_reply": "2023-09-06T18:39:57.008417Z" }, "papermill": { - "duration": 3.062059, - "end_time": "2023-09-06T17:37:44.927885", + "duration": 3.068353, + "end_time": "2023-09-06T18:39:57.010860", "exception": false, - "start_time": "2023-09-06T17:37:41.865826", + "start_time": "2023-09-06T18:39:53.942507", "status": "completed" }, "tags": [] @@ -140385,7 +140191,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2023-09-06 17:37:44,062] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + "[2023-09-06 18:39:56,146] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" ] }, { @@ -140422,19 +140228,19 @@ { "cell_type": "code", "execution_count": 17, - "id": "ff1e2d52", + "id": "01fca15c", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:37:46.169833Z", - "iopub.status.busy": "2023-09-06T17:37:46.169341Z", - "iopub.status.idle": "2023-09-06T17:37:46.403204Z", - "shell.execute_reply": "2023-09-06T17:37:46.402355Z" + "iopub.execute_input": "2023-09-06T18:39:58.256313Z", + "iopub.status.busy": "2023-09-06T18:39:58.256022Z", + "iopub.status.idle": "2023-09-06T18:39:58.488872Z", + "shell.execute_reply": "2023-09-06T18:39:58.488054Z" }, "papermill": { - "duration": 0.870871, - "end_time": "2023-09-06T17:37:46.404923", + "duration": 0.874819, + "end_time": "2023-09-06T18:39:58.490725", "exception": false, - "start_time": "2023-09-06T17:37:45.534052", + "start_time": "2023-09-06T18:39:57.615906", "status": "completed" }, "tags": [] @@ -140474,14 +140280,14 @@ }, "papermill": { "default_parameters": {}, - "duration": 1500.445098, - "end_time": "2023-09-06T17:37:47.161793", + "duration": 1492.297607, + "end_time": "2023-09-06T18:39:59.213287", "environment_variables": {}, "exception": null, "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/part2.ipynb", "parameters": {}, - "start_time": "2023-09-06T17:12:46.716695", + "start_time": "2023-09-06T18:15:06.915680", "version": "2.4.0" } },