diff --git "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb" "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb" --- "a/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb" +++ "b/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb" @@ -3,13 +3,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "6303a399", + "id": "718f1c9a", "metadata": { "papermill": { - "duration": 0.005464, - "end_time": "2023-09-06T17:17:07.017280", + "duration": 0.005477, + "end_time": "2023-09-06T17:24:46.367615", "exception": false, - "start_time": "2023-09-06T17:17:07.011816", + "start_time": "2023-09-06T17:24:46.362138", "status": "completed" }, "tags": [] @@ -25,13 +25,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "f39c274d", + "id": "3237126c", "metadata": { "papermill": { - "duration": 0.004562, - "end_time": "2023-09-06T17:17:07.026887", + "duration": 0.004583, + "end_time": "2023-09-06T17:24:46.377203", "exception": false, - "start_time": "2023-09-06T17:17:07.022325", + "start_time": "2023-09-06T17:24:46.372620", "status": "completed" }, "tags": [] @@ -43,19 +43,19 @@ { "cell_type": "code", "execution_count": 1, - "id": "339857f4", + "id": "3b172c13", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:17:07.038414Z", - "iopub.status.busy": "2023-09-06T17:17:07.037888Z", - "iopub.status.idle": "2023-09-06T17:17:08.034427Z", - "shell.execute_reply": "2023-09-06T17:17:08.033578Z" + "iopub.execute_input": "2023-09-06T17:24:46.388594Z", + "iopub.status.busy": "2023-09-06T17:24:46.388114Z", + "iopub.status.idle": "2023-09-06T17:24:47.401851Z", + "shell.execute_reply": "2023-09-06T17:24:47.400352Z" }, "papermill": { - "duration": 1.005021, - "end_time": "2023-09-06T17:17:08.036897", + "duration": 1.022435, + "end_time": "2023-09-06T17:24:47.404551", "exception": false, - "start_time": "2023-09-06T17:17:07.031876", + "start_time": "2023-09-06T17:24:46.382116", "status": "completed" }, "tags": [] @@ -65,10 +65,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize32 checkpoint\tnotebook\r\n", - "LICENSE RWKV-v5\t\t RWKV-v5r2\t datapath\toutput\r\n", - "README.md RWKV-v5altwavenet RWKV-v5rstack\t docker\r\n", - "RWKV-v4neo RWKV-v5headsize2x RWKV-v5wavenet model\r\n" + "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n", + "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n", + "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n", + "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n" ] } ], @@ -83,19 +83,19 @@ { "cell_type": "code", "execution_count": 2, - "id": "7b45aafc", + "id": "6a5b2ca0", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:17:08.049253Z", - "iopub.status.busy": "2023-09-06T17:17:08.048613Z", - "iopub.status.idle": "2023-09-06T17:17:11.256715Z", - "shell.execute_reply": "2023-09-06T17:17:11.255796Z" + "iopub.execute_input": "2023-09-06T17:24:47.418082Z", + "iopub.status.busy": "2023-09-06T17:24:47.416819Z", + "iopub.status.idle": "2023-09-06T17:24:50.719845Z", + "shell.execute_reply": "2023-09-06T17:24:50.718258Z" }, "papermill": { - "duration": 3.217287, - "end_time": "2023-09-06T17:17:11.259293", + "duration": 3.312823, + "end_time": "2023-09-06T17:24:50.722482", "exception": false, - "start_time": "2023-09-06T17:17:08.042006", + "start_time": "2023-09-06T17:24:47.409659", "status": "completed" }, "tags": [] @@ -118,19 +118,19 @@ { "cell_type": "code", "execution_count": 3, - "id": "5a6fa0ec", + "id": "c7c5f2ce", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:17:11.271967Z", - "iopub.status.busy": "2023-09-06T17:17:11.271338Z", - "iopub.status.idle": "2023-09-06T17:17:11.279590Z", - "shell.execute_reply": "2023-09-06T17:17:11.278754Z" + "iopub.execute_input": "2023-09-06T17:24:50.736379Z", + "iopub.status.busy": "2023-09-06T17:24:50.735150Z", + "iopub.status.idle": "2023-09-06T17:24:50.748734Z", + "shell.execute_reply": "2023-09-06T17:24:50.747177Z" }, "papermill": { - "duration": 0.016902, - "end_time": "2023-09-06T17:17:11.281556", + "duration": 0.023207, + "end_time": "2023-09-06T17:24:50.750984", "exception": false, - "start_time": "2023-09-06T17:17:11.264654", + "start_time": "2023-09-06T17:24:50.727777", "status": "completed" }, "tags": [] @@ -197,19 +197,19 @@ { "cell_type": "code", "execution_count": 4, - "id": "5715cb46", + "id": "23272857", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:17:11.294198Z", - "iopub.status.busy": "2023-09-06T17:17:11.293662Z", - "iopub.status.idle": "2023-09-06T17:17:31.891567Z", - "shell.execute_reply": "2023-09-06T17:17:31.890659Z" + "iopub.execute_input": "2023-09-06T17:24:50.764946Z", + "iopub.status.busy": "2023-09-06T17:24:50.764040Z", + "iopub.status.idle": "2023-09-06T17:25:12.804705Z", + "shell.execute_reply": "2023-09-06T17:25:12.803903Z" }, "papermill": { - "duration": 20.606807, - "end_time": "2023-09-06T17:17:31.893643", + "duration": 22.05082, + "end_time": "2023-09-06T17:25:12.807245", "exception": false, - "start_time": "2023-09-06T17:17:11.286836", + "start_time": "2023-09-06T17:24:50.756425", "status": "completed" }, "tags": [] @@ -219,14 +219,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2023-09-06 17:17:11-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth\r\n", - "Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.67, 18.154.227.69, ...\r\n", - "Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.\r\n", + "--2023-09-06 17:24:50-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-4k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 18.154.227.67, 18.154.227.69, 18.154.227.87, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|18.154.227.67|:443... connected.\r\n", "HTTP request sent, awaiting response... 302 Found\r\n", - "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279831&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTgzMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=iCSVplgSsG5AjmmXo591m6nLTcfvPpRlnGWiePyk13mX8KhpZ9pdk-CgfNOOMeUtGGTe7Dax%7EqOKvdtmYwBAGLh94g2O8F3Enb9ju0hOqXZa8Z0LoWk7-vfWY-DupCpdaUAS-vgVDxUcUL-VULBW2JnqqXO0Vzm2TaBAOxRSY6u86wfuSrMzfuxu7vPtGKnxU2tO8CFkltBdkumTlWJ8KSHLx8A0BfIn23aR2Wzq77%7EmXJ7dJvBt19%7EiS5p6m-ITw0yXoKhohGLmiubk4%7EmK543ibTfPBbRtxjkxzepwDwuOCLomwXnaGvmVfBjnrHa1ETJTjkro9qITPESxU6TOxg__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", - "--2023-09-06 17:17:11-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694279831&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI3OTgzMX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=iCSVplgSsG5AjmmXo591m6nLTcfvPpRlnGWiePyk13mX8KhpZ9pdk-CgfNOOMeUtGGTe7Dax%7EqOKvdtmYwBAGLh94g2O8F3Enb9ju0hOqXZa8Z0LoWk7-vfWY-DupCpdaUAS-vgVDxUcUL-VULBW2JnqqXO0Vzm2TaBAOxRSY6u86wfuSrMzfuxu7vPtGKnxU2tO8CFkltBdkumTlWJ8KSHLx8A0BfIn23aR2Wzq77%7EmXJ7dJvBt19%7EiS5p6m-ITw0yXoKhohGLmiubk4%7EmK543ibTfPBbRtxjkxzepwDwuOCLomwXnaGvmVfBjnrHa1ETJTjkro9qITPESxU6TOxg__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", - "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 108.138.64.121, 108.138.64.49, 108.138.64.36, ...\r\n", - "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.121|:443... connected.\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694280290&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MDI5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=l0Mwep-i6GNV-J%7EepezD7A17T72n6mA%7ENVRke24jJ9%7E2CDFf-7C7BUXFmpr2PCyka%7EO123V-aSM9kVMGZj6QIErLtWvw%7ER6iQmC9OFwIRUHp3HyFg-ZkMVj-b97ycZB2mCm3DPehloQrbgQkQcZqzyKTY5kK34eUVuSFcD%7EyM8V7vCuFr5fzKzGw87ji5hdxrxJJ5JbLMqcbtq-dlHHgzDtDI5bFsES5DOVLV0Lk02gg2fU-KxeCXDMPU3MTSuaUky2kQQgy4r2%7ENv20mFp5lSIuedQ2-kCzA8A%7EY50E9EP5qpkWGRBOE7Q52xZVZfwZ6GgXmiz0hw1a1XW0W27C5A__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-09-06 17:24:50-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/7eb7abfda2e4cfb2a961ba4d52564f9b330830ba1a836966556e28753468ea1e?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-4k.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-4k.pth%22%3B&Expires=1694280290&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDI4MDI5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzdlYjdhYmZkYTJlNGNmYjJhOTYxYmE0ZDUyNTY0ZjliMzMwODMwYmExYTgzNjk2NjU1NmUyODc1MzQ2OGVhMWU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=l0Mwep-i6GNV-J%7EepezD7A17T72n6mA%7ENVRke24jJ9%7E2CDFf-7C7BUXFmpr2PCyka%7EO123V-aSM9kVMGZj6QIErLtWvw%7ER6iQmC9OFwIRUHp3HyFg-ZkMVj-b97ycZB2mCm3DPehloQrbgQkQcZqzyKTY5kK34eUVuSFcD%7EyM8V7vCuFr5fzKzGw87ji5hdxrxJJ5JbLMqcbtq-dlHHgzDtDI5bFsES5DOVLV0Lk02gg2fU-KxeCXDMPU3MTSuaUky2kQQgy4r2%7ENv20mFp5lSIuedQ2-kCzA8A%7EY50E9EP5qpkWGRBOE7Q52xZVZfwZ6GgXmiz0hw1a1XW0W27C5A__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 108.138.64.121, 108.138.64.49, 108.138.64.111, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.121|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "HTTP request sent, awaiting response... " ] }, @@ -247,7 +253,39 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2 1%[ ] 15.26M 53.2MB/s " + " v5r3-L6-D2 1%[ ] 14.74M 60.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D20 1%[ ] 15.39M 34.6MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D204 3%[ ] 30.52M 37.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048 4%[ ] 45.78M 38.8MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5r3-L6-D2048- 6%[> ] 61.03M 41.4MB/s " ] }, { @@ -255,7 +293,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D20 3%[ ] 30.52M 46.6MB/s " + " v5r3-L6-D2048-E 7%[> ] 76.29M 45.5MB/s " ] }, { @@ -263,7 +301,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D204 4%[ ] 45.78M 48.4MB/s " + " v5r3-L6-D2048-E0 8%[> ] 91.20M 48.6MB/s " ] }, { @@ -271,7 +309,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048 5%[> ] 60.52M 52.7MB/s " + " v5r3-L6-D2048-E0_ 10%[=> ] 106.29M 49.2MB/s " ] }, { @@ -279,7 +317,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048- 7%[> ] 74.46M 46.7MB/s " + " v5r3-L6-D2048-E0_1 11%[=> ] 113.73M 48.2MB/s " ] }, { @@ -287,7 +325,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E 8%[> ] 91.03M 50.4MB/s " + "v5r3-L6-D2048-E0_1- 12%[=> ] 122.07M 46.1MB/s " ] }, { @@ -295,7 +333,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0 10%[=> ] 101.80M 47.0MB/s " + "5r3-L6-D2048-E0_1-e 13%[=> ] 139.89M 49.1MB/s " ] }, { @@ -303,7 +341,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0_ 10%[=> ] 106.81M 44.8MB/s " + "r3-L6-D2048-E0_1-en 15%[==> ] 152.59M 48.2MB/s eta 18s " ] }, { @@ -311,7 +349,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0_1 11%[=> ] 121.56M 46.7MB/s " + "3-L6-D2048-E0_1-enw 16%[==> ] 167.33M 48.8MB/s eta 18s " ] }, { @@ -319,7 +357,7 @@ "output_type": "stream", "text": [ "\r", - "v5r3-L6-D2048-E0_1- 13%[=> ] 134.53M 48.0MB/s " + "-L6-D2048-E0_1-enwi 16%[==> ] 172.31M 47.5MB/s eta 18s " ] }, { @@ -327,7 +365,7 @@ "output_type": "stream", "text": [ "\r", - "5r3-L6-D2048-E0_1-e 14%[=> ] 151.99M 50.6MB/s eta 17s " + "L6-D2048-E0_1-enwik 18%[==> ] 183.10M 47.5MB/s eta 18s " ] }, { @@ -335,7 +373,7 @@ "output_type": "stream", "text": [ "\r", - "r3-L6-D2048-E0_1-en 15%[==> ] 155.00M 48.4MB/s eta 17s " + "6-D2048-E0_1-enwiki 19%[==> ] 198.36M 47.4MB/s eta 17s " ] }, { @@ -343,7 +381,7 @@ "output_type": "stream", "text": [ "\r", - "3-L6-D2048-E0_1-enw 16%[==> ] 167.85M 48.7MB/s eta 17s " + "-D2048-E0_1-enwiki- 20%[===> ] 210.46M 48.9MB/s eta 17s " ] }, { @@ -351,7 +389,7 @@ "output_type": "stream", "text": [ "\r", - "-L6-D2048-E0_1-enwi 18%[==> ] 189.61M 52.0MB/s eta 17s " + "D2048-E0_1-enwiki-4 21%[===> ] 215.68M 46.7MB/s eta 17s " ] }, { @@ -359,7 +397,7 @@ "output_type": "stream", "text": [ "\r", - "L6-D2048-E0_1-enwik 19%[==> ] 202.61M 52.7MB/s eta 17s " + "2048-E0_1-enwiki-4k 23%[===> ] 243.63M 51.0MB/s eta 17s " ] }, { @@ -367,7 +405,7 @@ "output_type": "stream", "text": [ "\r", - "6-D2048-E0_1-enwiki 21%[===> ] 213.62M 48.7MB/s eta 16s " + "048-E0_1-enwiki-4k. 24%[===> ] 244.26M 49.1MB/s eta 17s " ] }, { @@ -375,7 +413,7 @@ "output_type": "stream", "text": [ "\r", - "-D2048-E0_1-enwiki- 22%[===> ] 226.41M 50.0MB/s eta 16s " + "48-E0_1-enwiki-4k.p 25%[====> ] 259.40M 48.4MB/s eta 17s " ] }, { @@ -383,7 +421,7 @@ "output_type": "stream", "text": [ "\r", - "D2048-E0_1-enwiki-4 23%[===> ] 243.63M 50.1MB/s eta 16s " + "8-E0_1-enwiki-4k.pt 27%[====> ] 274.66M 50.2MB/s eta 17s " ] }, { @@ -391,7 +429,7 @@ "output_type": "stream", "text": [ "\r", - "2048-E0_1-enwiki-4k 24%[===> ] 250.70M 50.7MB/s eta 16s " + "-E0_1-enwiki-4k.pth 28%[====> ] 289.92M 50.1MB/s eta 17s " ] }, { @@ -399,7 +437,7 @@ "output_type": "stream", "text": [ "\r", - "048-E0_1-enwiki-4k. 25%[====> ] 259.40M 49.3MB/s eta 16s " + "E0_1-enwiki-4k.pth 30%[=====> ] 305.18M 49.8MB/s eta 17s " ] }, { @@ -407,7 +445,7 @@ "output_type": "stream", "text": [ "\r", - "48-E0_1-enwiki-4k.p 27%[====> ] 274.66M 52.3MB/s eta 15s " + "0_1-enwiki-4k.pth 31%[=====> ] 322.50M 52.4MB/s eta 14s " ] }, { @@ -415,7 +453,7 @@ "output_type": "stream", "text": [ "\r", - "8-E0_1-enwiki-4k.pt 27%[====> ] 281.34M 50.0MB/s eta 15s " + "_1-enwiki-4k.pth 32%[=====> ] 335.18M 52.3MB/s eta 14s " ] }, { @@ -423,7 +461,7 @@ "output_type": "stream", "text": [ "\r", - "-E0_1-enwiki-4k.pth 28%[====> ] 289.92M 50.7MB/s eta 15s " + "1-enwiki-4k.pth 34%[=====> ] 350.44M 47.2MB/s eta 14s " ] }, { @@ -431,7 +469,7 @@ "output_type": "stream", "text": [ "\r", - "E0_1-enwiki-4k.pth 30%[=====> ] 305.18M 50.2MB/s eta 15s " + "-enwiki-4k.pth 35%[======> ] 364.38M 47.1MB/s eta 14s " ] }, { @@ -439,7 +477,7 @@ "output_type": "stream", "text": [ "\r", - "0_1-enwiki-4k.pth 31%[=====> ] 320.43M 51.6MB/s eta 15s " + "enwiki-4k.pth 37%[======> ] 380.15M 50.5MB/s eta 14s " ] }, { @@ -447,7 +485,7 @@ "output_type": "stream", "text": [ "\r", - "_1-enwiki-4k.pth 32%[=====> ] 325.55M 47.9MB/s eta 14s " + "nwiki-4k.pth 38%[======> ] 392.91M 50.5MB/s eta 14s " ] }, { @@ -455,7 +493,7 @@ "output_type": "stream", "text": [ "\r", - "1-enwiki-4k.pth 33%[=====> ] 335.69M 45.4MB/s eta 14s " + "wiki-4k.pth 39%[======> ] 396.73M 47.0MB/s eta 14s " ] }, { @@ -463,7 +501,7 @@ "output_type": "stream", "text": [ "\r", - "-enwiki-4k.pth 34%[=====> ] 350.95M 46.9MB/s eta 14s " + "iki-4k.pth 40%[=======> ] 411.47M 47.9MB/s eta 13s " ] }, { @@ -471,7 +509,7 @@ "output_type": "stream", "text": [ "\r", - "enwiki-4k.pth 37%[======> ] 377.49M 49.4MB/s eta 14s " + "ki-4k.pth 41%[=======> ] 426.73M 50.0MB/s eta 13s " ] }, { @@ -479,7 +517,7 @@ "output_type": "stream", "text": [ "\r", - "nwiki-4k.pth 38%[======> ] 396.22M 50.4MB/s eta 12s " + "i-4k.pth 43%[=======> ] 440.68M 46.6MB/s eta 13s " ] }, { @@ -487,7 +525,7 @@ "output_type": "stream", "text": [ "\r", - "wiki-4k.pth 40%[=======> ] 411.47M 51.8MB/s eta 12s " + "-4k.pth 44%[=======> ] 455.93M 51.3MB/s eta 13s " ] }, { @@ -495,7 +533,7 @@ "output_type": "stream", "text": [ "\r", - "iki-4k.pth 41%[=======> ] 423.97M 52.9MB/s eta 12s " + "4k.pth 45%[========> ] 457.76M 47.6MB/s eta 13s " ] }, { @@ -503,7 +541,7 @@ "output_type": "stream", "text": [ "\r", - "ki-4k.pth 42%[=======> ] 428.21M 50.3MB/s eta 12s " + "k.pth 46%[========> ] 472.50M 46.5MB/s eta 11s " ] }, { @@ -511,7 +549,7 @@ "output_type": "stream", "text": [ "\r", - "i-4k.pth 43%[=======> ] 440.68M 50.5MB/s eta 12s " + ".pth 46%[========> ] 475.08M 45.9MB/s eta 11s " ] }, { @@ -519,7 +557,7 @@ "output_type": "stream", "text": [ "\r", - "-4k.pth 43%[=======> ] 442.66M 50.3MB/s eta 12s " + "pth 47%[========> ] 487.77M 44.0MB/s eta 11s " ] }, { @@ -527,7 +565,7 @@ "output_type": "stream", "text": [ "\r", - "4k.pth 44%[=======> ] 456.72M 49.8MB/s eta 12s " + "th 49%[========> ] 503.54M 43.1MB/s eta 11s " ] }, { @@ -535,7 +573,7 @@ "output_type": "stream", "text": [ "\r", - "k.pth 46%[========> ] 472.50M 51.1MB/s eta 12s " + "h 51%[=========> ] 518.80M 44.5MB/s eta 10s " ] }, { @@ -543,7 +581,7 @@ "output_type": "stream", "text": [ "\r", - ".pth 47%[========> ] 480.25M 51.7MB/s eta 12s " + " 52%[=========> ] 534.05M 47.1MB/s eta 10s " ] }, { @@ -551,7 +589,7 @@ "output_type": "stream", "text": [ "\r", - "pth 48%[========> ] 488.28M 50.6MB/s eta 12s " + " v 54%[=========> ] 549.31M 49.2MB/s eta 10s " ] }, { @@ -559,7 +597,7 @@ "output_type": "stream", "text": [ "\r", - "th 50%[=========> ] 511.80M 53.9MB/s eta 10s " + " v5 55%[==========> ] 564.58M 51.1MB/s eta 10s " ] }, { @@ -567,7 +605,7 @@ "output_type": "stream", "text": [ "\r", - "h 51%[=========> ] 528.83M 53.4MB/s eta 10s " + " v5r 57%[==========> ] 579.83M 49.1MB/s eta 9s " ] }, { @@ -575,7 +613,7 @@ "output_type": "stream", "text": [ "\r", - " 52%[=========> ] 534.05M 54.8MB/s eta 10s " + " v5r3 58%[==========> ] 595.09M 50.9MB/s eta 9s " ] }, { @@ -583,7 +621,7 @@ "output_type": "stream", "text": [ "\r", - " v 54%[=========> ] 549.31M 57.4MB/s eta 10s " + " v5r3- 59%[==========> ] 608.93M 52.9MB/s eta 9s " ] }, { @@ -591,7 +629,7 @@ "output_type": "stream", "text": [ "\r", - " v5 55%[==========> ] 566.76M 59.5MB/s eta 10s " + " v5r3-L 61%[===========> ] 623.04M 52.4MB/s eta 9s " ] }, { @@ -599,7 +637,7 @@ "output_type": "stream", "text": [ "\r", - " v5r 56%[==========> ] 573.51M 56.4MB/s eta 9s " + " v5r3-L6 61%[===========> ] 625.61M 51.2MB/s eta 9s " ] }, { @@ -607,7 +645,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3 57%[==========> ] 579.83M 49.8MB/s eta 9s " + " v5r3-L6- 62%[===========> ] 640.36M 49.9MB/s eta 8s " ] }, { @@ -615,7 +653,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3- 58%[==========> ] 594.57M 48.9MB/s eta 9s " + " v5r3-L6-D 63%[===========> ] 640.87M 50.0MB/s eta 8s " ] }, { @@ -623,7 +661,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L 60%[===========> ] 610.35M 50.6MB/s eta 9s " + " v5r3-L6-D2 64%[===========> ] 655.62M 50.5MB/s eta 8s " ] }, { @@ -631,7 +669,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6 61%[===========> ] 625.61M 51.2MB/s eta 8s " + " v5r3-L6-D20 65%[============> ] 661.31M 51.0MB/s eta 8s " ] }, { @@ -639,7 +677,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6- 63%[===========> ] 640.87M 53.4MB/s eta 8s " + " v5r3-L6-D204 66%[============> ] 671.38M 51.0MB/s eta 8s " ] }, { @@ -647,7 +685,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D 64%[===========> ] 656.13M 53.0MB/s eta 8s " + " v5r3-L6-D2048 67%[============> ] 686.64M 52.9MB/s eta 7s " ] }, { @@ -655,7 +693,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2 66%[============> ] 671.38M 53.7MB/s eta 8s " + " v5r3-L6-D2048- 68%[============> ] 694.35M 51.9MB/s eta 7s " ] }, { @@ -663,7 +701,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D20 67%[============> ] 686.13M 56.4MB/s eta 6s " + " v5r3-L6-D2048-E 68%[============> ] 699.39M 49.5MB/s eta 7s " ] }, { @@ -671,7 +709,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D204 67%[============> ] 686.77M 52.2MB/s eta 6s " + " v5r3-L6-D2048-E0 69%[============> ] 703.65M 46.0MB/s eta 7s " ] }, { @@ -679,7 +717,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048 69%[============> ] 701.90M 52.3MB/s eta 6s " + " v5r3-L6-D2048-E0_ 70%[=============> ] 716.64M 44.8MB/s eta 7s " ] }, { @@ -687,7 +725,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048- 70%[=============> ] 717.16M 51.2MB/s eta 6s " + " v5r3-L6-D2048-E0_1 71%[=============> ] 730.72M 47.1MB/s eta 6s " ] }, { @@ -695,7 +733,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E 72%[=============> ] 732.42M 52.4MB/s eta 6s " + "v5r3-L6-D2048-E0_1- 72%[=============> ] 732.42M 43.0MB/s eta 6s " ] }, { @@ -703,7 +741,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0 73%[=============> ] 742.67M 51.0MB/s eta 6s " + "5r3-L6-D2048-E0_1-e 72%[=============> ] 736.61M 41.3MB/s eta 6s " ] }, { @@ -711,7 +749,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0_ 74%[=============> ] 756.60M 50.1MB/s eta 6s " + "r3-L6-D2048-E0_1-en 73%[=============> ] 747.17M 40.4MB/s eta 6s " ] }, { @@ -719,7 +757,7 @@ "output_type": "stream", "text": [ "\r", - " v5r3-L6-D2048-E0_1 75%[==============> ] 771.87M 52.0MB/s eta 6s " + "3-L6-D2048-E0_1-enw 75%[==============> ] 762.94M 41.6MB/s eta 6s " ] }, { @@ -727,7 +765,7 @@ "output_type": "stream", "text": [ "\r", - "v5r3-L6-D2048-E0_1- 76%[==============> ] 781.29M 53.7MB/s eta 6s " + "-L6-D2048-E0_1-enwi 76%[==============> ] 774.56M 40.6MB/s eta 5s " ] }, { @@ -735,7 +773,7 @@ "output_type": "stream", "text": [ "\r", - "5r3-L6-D2048-E0_1-e 78%[==============> ] 793.46M 52.7MB/s eta 4s " + "L6-D2048-E0_1-enwik 77%[==============> ] 789.24M 44.0MB/s eta 5s " ] }, { @@ -743,7 +781,7 @@ "output_type": "stream", "text": [ "\r", - "r3-L6-D2048-E0_1-en 79%[==============> ] 808.71M 51.9MB/s eta 4s " + "6-D2048-E0_1-enwiki 78%[==============> ] 793.46M 41.0MB/s eta 5s " ] }, { @@ -751,7 +789,7 @@ "output_type": "stream", "text": [ "\r", - "3-L6-D2048-E0_1-enw 81%[===============> ] 823.97M 52.1MB/s eta 4s " + "-D2048-E0_1-enwiki- 79%[==============> ] 808.71M 44.1MB/s eta 5s " ] }, { @@ -759,7 +797,7 @@ "output_type": "stream", "text": [ "\r", - "-L6-D2048-E0_1-enwi 82%[===============> ] 840.69M 53.9MB/s eta 4s " + "D2048-E0_1-enwiki-4 80%[===============> ] 823.46M 43.7MB/s eta 5s " ] }, { @@ -767,7 +805,7 @@ "output_type": "stream", "text": [ "\r", - "L6-D2048-E0_1-enwik 84%[===============> ] 854.49M 52.7MB/s eta 4s " + "2048-E0_1-enwiki-4k 82%[===============> ] 838.71M 46.8MB/s eta 4s " ] }, { @@ -775,7 +813,7 @@ "output_type": "stream", "text": [ "\r", - "6-D2048-E0_1-enwiki 85%[================> ] 869.75M 52.5MB/s eta 3s " + "048-E0_1-enwiki-4k. 83%[===============> ] 849.35M 45.0MB/s eta 4s " ] }, { @@ -783,7 +821,7 @@ "output_type": "stream", "text": [ "\r", - "-D2048-E0_1-enwiki- 87%[================> ] 885.01M 53.8MB/s eta 3s " + "48-E0_1-enwiki-4k.p 84%[===============> ] 854.49M 42.2MB/s eta 4s " ] }, { @@ -791,7 +829,7 @@ "output_type": "stream", "text": [ "\r", - "D2048-E0_1-enwiki-4 88%[================> ] 899.75M 53.7MB/s eta 3s " + "8-E0_1-enwiki-4k.pt 85%[================> ] 869.75M 43.5MB/s eta 4s " ] }, { @@ -799,7 +837,7 @@ "output_type": "stream", "text": [ "\r", - "2048-E0_1-enwiki-4k 89%[================> ] 906.85M 51.7MB/s eta 3s " + "-E0_1-enwiki-4k.pth 86%[================> ] 884.49M 45.5MB/s eta 4s " ] }, { @@ -807,7 +845,7 @@ "output_type": "stream", "text": [ "\r", - "048-E0_1-enwiki-4k. 90%[=================> ] 915.66M 53.2MB/s eta 2s " + "E0_1-enwiki-4k.pth 88%[================> ] 896.90M 47.7MB/s eta 3s " ] }, { @@ -815,7 +853,7 @@ "output_type": "stream", "text": [ "\r", - "48-E0_1-enwiki-4k.p 91%[=================> ] 930.27M 53.4MB/s eta 2s " + "0_1-enwiki-4k.pth 88%[================> ] 900.27M 46.5MB/s eta 3s " ] }, { @@ -823,7 +861,7 @@ "output_type": "stream", "text": [ "\r", - "8-E0_1-enwiki-4k.pt 92%[=================> ] 945.53M 54.0MB/s eta 2s " + "_1-enwiki-4k.pth 89%[================> ] 915.01M 48.9MB/s eta 3s " ] }, { @@ -831,7 +869,7 @@ "output_type": "stream", "text": [ "\r", - "-E0_1-enwiki-4k.pth 94%[=================> ] 960.79M 51.7MB/s eta 2s " + "1-enwiki-4k.pth 90%[=================> ] 918.15M 44.7MB/s eta 3s " ] }, { @@ -839,7 +877,7 @@ "output_type": "stream", "text": [ "\r", - "E0_1-enwiki-4k.pth 96%[==================> ] 976.55M 53.0MB/s eta 1s " + "-enwiki-4k.pth 91%[=================> ] 930.27M 47.6MB/s eta 2s " ] }, { @@ -847,7 +885,7 @@ "output_type": "stream", "text": [ "\r", - "0_1-enwiki-4k.pth 97%[==================> ] 986.81M 50.5MB/s eta 1s " + "enwiki-4k.pth 92%[=================> ] 945.53M 50.1MB/s eta 2s " ] }, { @@ -855,7 +893,7 @@ "output_type": "stream", "text": [ "\r", - "_1-enwiki-4k.pth 97%[==================> ] 991.82M 49.7MB/s eta 1s " + "nwiki-4k.pth 94%[=================> ] 960.79M 48.5MB/s eta 2s " ] }, { @@ -863,7 +901,7 @@ "output_type": "stream", "text": [ "\r", - "1-enwiki-4k.pth 99%[==================> ] 1007M 48.7MB/s eta 1s " + "wiki-4k.pth 96%[==================> ] 976.55M 49.3MB/s eta 2s " ] }, { @@ -871,9 +909,33 @@ "output_type": "stream", "text": [ "\r", - "v5r3-L6-D2048-E0_1- 100%[===================>] 1017M 50.0MB/s in 20s \r\n", + "iki-4k.pth 97%[==================> ] 991.82M 50.0MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ki-4k.pth 99%[==================> ] 1007M 49.7MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "i-4k.pth 99%[==================> ] 1016M 48.8MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5r3-L6-D2048-E0_1- 100%[===================>] 1017M 48.7MB/s in 21s \r\n", "\r\n", - "2023-09-06 17:17:31 (51.1 MB/s) - ‘v5r3-L6-D2048-E0_1-enwiki-4k.pth’ saved [1066536937/1066536937]\r\n", + "2023-09-06 17:25:12 (47.8 MB/s) - ‘v5r3-L6-D2048-E0_1-enwiki-4k.pth’ saved [1066536937/1066536937]\r\n", "\r\n" ] }, @@ -882,8 +944,8 @@ "output_type": "stream", "text": [ "total 1018M\r\n", - "drwxr-xr-x 2 root root 4.0K Sep 6 17:17 .\r\n", - "drwxr-xr-x 20 root root 4.0K Sep 6 17:17 ..\r\n", + "drwxr-xr-x 2 root root 4.0K Sep 6 17:24 .\r\n", + "drwxr-xr-x 20 root root 4.0K Sep 6 17:24 ..\r\n", "-rw-r--r-- 1 root root 1018M Sep 6 17:07 v5r3-L6-D2048-E0_1-enwiki-4k.pth\r\n" ] } @@ -900,13 +962,13 @@ { "attachments": {}, "cell_type": "markdown", - "id": "4e6c3e65", + "id": "a7640f2b", "metadata": { "papermill": { - "duration": 0.008152, - "end_time": "2023-09-06T17:17:31.910423", + "duration": 0.012199, + "end_time": "2023-09-06T17:25:12.831966", "exception": false, - "start_time": "2023-09-06T17:17:31.902271", + "start_time": "2023-09-06T17:25:12.819767", "status": "completed" }, "tags": [] @@ -918,19 +980,19 @@ { "cell_type": "code", "execution_count": 5, - "id": "b243d3f6", + "id": "dbfa1a63", "metadata": { "execution": { - "iopub.execute_input": "2023-09-06T17:17:31.929289Z", - "iopub.status.busy": "2023-09-06T17:17:31.928880Z", - "iopub.status.idle": "2023-09-06T17:17:44.337863Z", - "shell.execute_reply": "2023-09-06T17:17:44.336545Z" + "iopub.execute_input": "2023-09-06T17:25:12.852580Z", + "iopub.status.busy": "2023-09-06T17:25:12.851982Z", + "iopub.status.idle": "2023-09-06T17:25:20.359135Z", + "shell.execute_reply": "2023-09-06T17:25:20.357791Z" }, "papermill": { - "duration": 12.421926, - "end_time": "2023-09-06T17:17:44.340880", + "duration": 7.52148, + "end_time": "2023-09-06T17:25:20.362711", "exception": false, - "start_time": "2023-09-06T17:17:31.918954", + "start_time": "2023-09-06T17:25:12.841231", "status": "completed" }, "tags": [] @@ -941,8 +1003,7 @@ "output_type": "stream", "text": [ "\r", - "Downloading readme: 0%| | 0.00/7.79k [00:00=12.1), as this is known to have freeze issues\r\n", + "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", + "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", + "#\r\n", + "\r\n", + "[RWKV.model] Configuring optimizer with\r\n", + " - lr_init: 4.000e-04 (0.0004)\r\n", + " - lr_final: 3.000e-04 (0.0003)\r\n", + "\r\n", + "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\r", - "Map (num_proc=32): 64%|██████▍ | 9582/15015 [00:01<00:00, 5487.01 examples/s]" + "Detected CUDA files, patching ldflags\r\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", + "Building extension module fused_adam...\r\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\r", - "Map (num_proc=32): 86%|██████▊ | 12895/15015 [00:02<00:00, 11304.34 examples/s]" + "ninja: no work to do.\r\n", + "Loading extension module fused_adam...\r\n", + "Time to load fused_adam op: 0.08642840385437012 seconds\r\n", + "Loading `train_dataloader` to estimate number of stepping batches.\r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\r", - "Map (num_proc=32): 95%|████████▌| 14279/15015 [00:02<00:00, 9003.98 examples/s]" + "Rank: 0 partition count [1, 1] and sizes[(533245952, False), (384, False)] \r\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\r", - "Map (num_proc=32): 100%|█████████| 15015/15015 [00:02<00:00, 5557.21 examples/s]\r\n" + "\r\n", + " | Name | Type | Params\r\n", + "--------------------------------------\r\n", + "0 | emb | Embedding | 102 M \r\n", + "1 | blocks | ModuleList | 327 M \r\n", + "2 | ln_out | LayerNorm | 4.1 K \r\n", + "3 | head | Linear | 102 M \r\n", + "--------------------------------------\r\n", + "533 M Trainable params\r\n", + "0 Non-trainable params\r\n", + "533 M Total params\r\n", + "2,132.985 Total estimated model params size (MB)\r\n" ] }, { @@ -1123,7 +1248,9 @@ "output_type": "stream", "text": [ "\r", - "Filter (num_proc=32): 0%| | 0/15015 [00:00=12.1), as this is known to have freeze issues\r\n", - "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n", - "# - When resuming from checkpoint, the estimated time is inaccurate\r\n", - "#\r\n", - "\r\n", - "[RWKV.model] Configuring optimizer with\r\n", - " - lr_init: 4.000e-04 (0.0004)\r\n", - " - lr_final: 3.000e-04 (0.0003)\r\n", - "\r\n" + "\r", + "Epoch 0: 0%| | 21/14932 [00:15<3:09:18, 1.31it/s, v_num=z6kg, train/loss=3.38\r", + "Epoch 0: 0%| | 21/14932 [00:15<3:09:18, 1.31it/s, v_num=z6kg, train/loss=4.81" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Using /root/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...\r\n", - "Detected CUDA files, patching ldflags\r\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\r\n", - "Building extension module fused_adam...\r\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n" + "\r", + "Epoch 0: 0%| | 22/14932 [00:16<3:01:34, 1.37it/s, v_num=z6kg, train/loss=4.81\r", + "Epoch 0: 0%| | 22/14932 [00:16<3:01:34, 1.37it/s, v_num=z6kg, train/loss=6.38" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "ninja: no work to do.\r\n", - "Loading extension module fused_adam...\r\n", - "Time to load fused_adam op: 0.08282637596130371 seconds\r\n", - "Loading `train_dataloader` to estimate number of stepping batches.\r\n" + "\r", + "Epoch 0: 0%| | 23/14932 [00:16<2:54:30, 1.42it/s, v_num=z6kg, train/loss=6.38\r", + "Epoch 0: 0%| | 23/14932 [00:16<2:54:31, 1.42it/s, v_num=z6kg, train/loss=5.16" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Rank: 0 partition count [1, 1] and sizes[(533245952, False), (384, False)] \r\n" + "\r", + "Epoch 0: 0%| | 24/14932 [00:16<2:48:02, 1.48it/s, v_num=z6kg, train/loss=5.16\r", + "Epoch 0: 0%| | 24/14932 [00:16<2:48:03, 1.48it/s, v_num=z6kg, train/loss=4.78" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\r\n", - " | Name | Type | Params\r\n", - "--------------------------------------\r\n", - "0 | emb | Embedding | 102 M \r\n", - "1 | blocks | ModuleList | 327 M \r\n", - "2 | ln_out | LayerNorm | 4.1 K \r\n", - "3 | head | Linear | 102 M \r\n", - "--------------------------------------\r\n", - "533 M Trainable params\r\n", - "0 Non-trainable params\r\n", - "533 M Total params\r\n", - "2,132.985 Total estimated model params size (MB)\r\n" + "\r", + "Epoch 0: 0%| | 25/14932 [00:16<2:42:04, 1.53it/s, v_num=z6kg, train/loss=4.78\r", + "Epoch 0: 0%| | 25/14932 [00:16<2:42:05, 1.53it/s, v_num=z6kg, train/loss=6.03" ] }, { @@ -1406,9 +1483,8 @@ "output_type": "stream", "text": [ "\r", - "Training: 0it [00:00, ?it/s]\r", - "Training: 0%| | 0/14932 [00:00\r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 353, in __init__\r\n", + " self._run_subcommand(self.subcommand)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 642, in _run_subcommand\r\n", + " fn(**fn_kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 529, in fit\r\n", + " call._call_and_handle_interrupt(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 41, in _call_and_handle_interrupt\r\n", + " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/strategies/launchers/subprocess_script.py\", line 91, in launch\r\n", + " return function(*args, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 568, in _fit_impl\r\n", + " self._run(model, ckpt_path=ckpt_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/trainer.py\", line 925, in _run\r\n", + " self._data_connector.prepare_data()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py\", line 94, in prepare_data\r\n", + " call._call_lightning_datamodule_hook(trainer, \"prepare_data\")\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/call.py\", line 164, in _call_lightning_datamodule_hook\r\n", + " return fn(*args, **kwargs)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 542, in prepare_data\r\n", + " prepare_data_static(**self._init_locals)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/data.py\", line 101, in prepare_data_static\r\n", + " src_dataset = load_dataset(**load_dataset_params)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 2112, in load_dataset\r\n", + " builder_instance = load_dataset_builder(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1798, in load_dataset_builder\r\n", + " dataset_module = dataset_module_factory(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 1413, in dataset_module_factory\r\n", + " ).get_module()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/load.py\", line 948, in get_module\r\n", + " patterns = sanitize_patterns(self.data_files) if self.data_files is not None else get_data_patterns(base_path)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/datasets/data_files.py\", line 459, in get_data_patterns\r\n", + " raise EmptyDatasetError(f\"The directory at {base_path} doesn't contain any data files\") from None\r\n", + "datasets.data_files.EmptyDatasetError: The directory at /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/dataset doesn't contain any data files\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: - 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.005 MB of 0.005 MB uploaded (0.000 MB deduped)\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Instruct (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/xuck99wm\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v27\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_175222-xuck99wm/logs\u001b[0m\r\n" + ] + } + ], "source": [ "# Start the finetune model training\n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -6858,19 +141224,54 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "cb00a0de", + "execution_count": 12, + "id": "c93850c1", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:39.516649Z", + "iopub.status.busy": "2023-09-06T17:52:39.515772Z", + "iopub.status.idle": "2023-09-06T17:52:43.445323Z", + "shell.execute_reply": "2023-09-06T17:52:43.444433Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 4.911758, + "end_time": "2023-09-06T17:52:43.448281", + "exception": false, + "start_time": "2023-09-06T17:52:38.536523", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:52:41,942] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-instruct/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-instruct.pth': No such file or directory\r\n" + ] + } + ], "source": [ "# Lets export the model from the checkpoint\n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -6882,19 +141283,33 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b23e0082", + "execution_count": 13, + "id": "9cbbdc1c", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:45.335301Z", + "iopub.status.busy": "2023-09-06T17:52:45.334570Z", + "iopub.status.idle": "2023-09-06T17:52:45.599928Z", + "shell.execute_reply": "2023-09-06T17:52:45.598870Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 1.153922, + "end_time": "2023-09-06T17:52:45.602059", + "exception": false, + "start_time": "2023-09-06T17:52:44.448137", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], "source": [ "# Lets do a quick memory test\n", "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-instruct.pth\"" @@ -6902,14 +141317,14 @@ }, { "cell_type": "markdown", - "id": "a5b04004", + "id": "9759931b", "metadata": { "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 0.980039, + "end_time": "2023-09-06T17:52:47.551588", + "exception": false, + "start_time": "2023-09-06T17:52:46.571549", + "status": "completed" }, "tags": [] }, @@ -6921,19 +141336,264 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "13388b95", + "execution_count": 14, + "id": "e4b68f37", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:49.396209Z", + "iopub.status.busy": "2023-09-06T17:52:49.395658Z", + "iopub.status.idle": "2023-09-06T17:52:49.450143Z", + "shell.execute_reply": "2023-09-06T17:52:49.449447Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 1.025106, + "end_time": "2023-09-06T17:52:49.452088", + "exception": false, + "start_time": "2023-09-06T17:52:48.426982", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Generating word reptition dataset ##\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/gen_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/shuffle_limited_prompt_completion_jsonl.py': [Errno 2] No such file or directory\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Done ##\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 8.0K\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 2 root root 4.0K Sep 6 17:52 .\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "drwxr-xr-x 6 root root 4.0K Sep 6 17:52 ..\n" + ] + } + ], "source": [ "%%script bash\n", "\n", @@ -6980,19 +141640,129 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b97a452e", + "execution_count": 15, + "id": "dfe7c26c", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:52:51.295342Z", + "iopub.status.busy": "2023-09-06T17:52:51.294782Z", + "iopub.status.idle": "2023-09-06T17:53:11.646941Z", + "shell.execute_reply": "2023-09-06T17:53:11.645847Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 21.229481, + "end_time": "2023-09-06T17:53:11.649754", + "exception": false, + "start_time": "2023-09-06T17:52:50.420273", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:52:56,145] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-instruct.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/', '--model.lr_init=5e-4', '--model.lr_final=4e-4', '--data.max_token_size=512', '--model.ctx_len=512', '--model.bptt_learning_range=1', '--model.load_model=../model/v5r3-L6-D2048-E0_1-mem-instruct.pth'].\r\n", + " rank_zero_warn(\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 1381932438\r\n", + " rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n", + "Global seed set to 1381932438\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: - Waiting for wandb.init()...\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \\ Waiting for wandb.init()...\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230906_175300-6yfdmqhq\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/6yfdmqhq\u001b[0m\r\n", + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 254, in \r\n", + " cli_main()\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/lightning_trainer.py\", line 233, in cli_main\r\n", + " LightningCLI(\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 350, in __init__\r\n", + " self.instantiate_classes()\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py\", line 499, in instantiate_classes\r\n", + " self.config_init = self.parser.instantiate_classes(self.config)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1130, in instantiate_classes\r\n", + " cfg[subcommand] = subparser.instantiate_classes(cfg[subcommand], instantiate_groups=instantiate_groups)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_deprecated.py\", line 139, in patched_instantiate_classes\r\n", + " cfg = self._unpatched_instantiate_classes(cfg, **kwargs)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_core.py\", line 1124, in instantiate_classes\r\n", + " component.instantiate_class(component, cfg)\r\n", + " File \"/usr/local/lib/python3.10/dist-packages/jsonargparse/_signatures.py\", line 561, in group_instantiate_class\r\n", + " parent[key] = group.group_class(**value)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/src/model.py\", line 559, in __init__\r\n", + " raise ValueError(f\"load_model file '{load_model}' does not exist\")\r\n", + "ValueError: load_model file '../model/v5r3-L6-D2048-E0_1-mem-instruct.pth' does not exist\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[31m(failed 1).\u001b[0m Press Control-C to abort syncing.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5r3-L6-D2048-E0.1 - Mem-Tune ctx-512 (train-ctx=512, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/6yfdmqhq\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v28\u001b[0m\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230906_175300-6yfdmqhq/logs\u001b[0m\r\n" + ] + } + ], "source": [ "# Start the finetune model training\n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -7013,19 +141783,54 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "28c68850", + "execution_count": 16, + "id": "a64f121e", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:53:13.678266Z", + "iopub.status.busy": "2023-09-06T17:53:13.677894Z", + "iopub.status.idle": "2023-09-06T17:53:17.614786Z", + "shell.execute_reply": "2023-09-06T17:53:17.613856Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 4.956429, + "end_time": "2023-09-06T17:53:17.617091", + "exception": false, + "start_time": "2023-09-06T17:53:12.660662", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-09-06 17:53:16,116] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 651, in \r\n", + " convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file, save_dtype=args.dtype)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 542, in convert_zero_checkpoint_to_fp32_state_dict\r\n", + " state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)\r\n", + " File \"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5/export_checkpoint.py\", line 516, in get_fp32_state_dict_from_zero_checkpoint\r\n", + " raise ValueError(f\"Unable to find 'latest' file at {latest_path}\")\r\n", + "ValueError: Unable to find 'latest' file at ../checkpoint/v5r3-L6-D2048-E0_1-mem-ctx-512/last.ckpt/latest\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ls: cannot access '../model/v5r3-L6-D2048-E0_1-mem-ctx-512.pth': No such file or directory\r\n" + ] + } + ], "source": [ "# Lets export the model from the checkpoint\n", "!cd \"{TRAINER_DIR}\" && \\\n", @@ -7037,19 +141842,33 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "21791457", + "execution_count": 17, + "id": "7b3383b0", "metadata": { + "execution": { + "iopub.execute_input": "2023-09-06T17:53:19.466424Z", + "iopub.status.busy": "2023-09-06T17:53:19.465359Z", + "iopub.status.idle": "2023-09-06T17:53:19.731128Z", + "shell.execute_reply": "2023-09-06T17:53:19.730200Z" + }, "papermill": { - "duration": null, - "end_time": null, - "exception": null, - "start_time": null, - "status": "pending" + "duration": 1.231112, + "end_time": "2023-09-06T17:53:19.733404", + "exception": false, + "start_time": "2023-09-06T17:53:18.502292", + "status": "completed" }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory\r\n" + ] + } + ], "source": [ "# Lets do a quick memory test\n", "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-512.pth\"" @@ -7072,18 +141891,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" }, "papermill": { "default_parameters": {}, - "duration": null, - "end_time": null, + "duration": 1715.908096, + "end_time": "2023-09-06T17:53:21.033393", "environment_variables": {}, "exception": null, "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb", "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/part2.ipynb", "parameters": {}, - "start_time": "2023-09-06T17:17:05.666978", + "start_time": "2023-09-06T17:24:45.125297", "version": "2.4.0" } },