diff --git "a/experiment/memory-bench/Benchmark-V5headsize2x.ipynb" "b/experiment/memory-bench/Benchmark-V5headsize2x.ipynb" new file mode 100644--- /dev/null +++ "b/experiment/memory-bench/Benchmark-V5headsize2x.ipynb" @@ -0,0 +1,4198 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "71c4c32a", + "metadata": { + "papermill": { + "duration": 0.002417, + "end_time": "2023-08-25T18:13:03.824878", + "exception": false, + "start_time": "2023-08-25T18:13:03.822461", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Custom Experimental Memory benchmarking\n", + "\n", + "The following is meant to observe the memory performance in a more verbose CSV logging mode, for the baseline raven models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fe270b5b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:13:03.831018Z", + "iopub.status.busy": "2023-08-25T18:13:03.830299Z", + "iopub.status.idle": "2023-08-25T18:14:12.483726Z", + "shell.execute_reply": "2023-08-25T18:14:12.482668Z" + }, + "papermill": { + "duration": 68.65868, + "end_time": "2023-08-25T18:14:12.485830", + "exception": false, + "start_time": "2023-08-25T18:13:03.827150", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-08-25 18:13:04-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-headsize2x/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\r\n", + "Resolving huggingface.co (huggingface.co)... 99.84.108.70, 99.84.108.87, 99.84.108.55, ...\r\n", + "Connecting to huggingface.co (huggingface.co)|99.84.108.70|:443... connected.\r\n", + "HTTP request sent, awaiting response... 302 Found\r\n", + "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fae9799e56fd469c6c1694dae6ae77f78130c06d340e945a3dc8e8ea2bd4e5bb?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth%3B+filename%3D%22v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth%22%3B&Expires=1693246384&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzI0NjM4NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhZTk3OTllNTZmZDQ2OWM2YzE2OTRkYWU2YWU3N2Y3ODEzMGMwNmQzNDBlOTQ1YTNkYzhlOGVhMmJkNGU1YmI%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=olQ8mut41rgo9BvutZmMf1PeLkSaO6PhZIzWLH4uXhJkOjZW2KPyyWAEUK1KgjPusfG7YqYbKidI2%7Efik2Nl0XSeH5ekJa8PUrMXUy5tLW2uj9h22RWGHMcTW9kF-uBoul6PBNEF6gzSk%7EB9XeS9Yv6Oq86rojg0td0fABf9Krm50-ZRRdaH9-mpV70uC1PIqtHnjBvRs6oLzmZXC6FLdiT3tF8eQ6tEqeF0ls8yneFcUEBhrggaCGUKgnr1TH60IQ4YYP3PrzuCXsrpag-kyRc1%7EUfPipr-CupOkFHyS7sscluBLcp67ZopVx8iLG%7ETCKZUIlXsa%7EqTcrcyRvm4Ag__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n", + "--2023-08-25 18:13:04-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/fae9799e56fd469c6c1694dae6ae77f78130c06d340e945a3dc8e8ea2bd4e5bb?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth%3B+filename%3D%22v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth%22%3B&Expires=1693246384&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzI0NjM4NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkL2ZhZTk3OTllNTZmZDQ2OWM2YzE2OTRkYWU2YWU3N2Y3ODEzMGMwNmQzNDBlOTQ1YTNkYzhlOGVhMmJkNGU1YmI%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=olQ8mut41rgo9BvutZmMf1PeLkSaO6PhZIzWLH4uXhJkOjZW2KPyyWAEUK1KgjPusfG7YqYbKidI2%7Efik2Nl0XSeH5ekJa8PUrMXUy5tLW2uj9h22RWGHMcTW9kF-uBoul6PBNEF6gzSk%7EB9XeS9Yv6Oq86rojg0td0fABf9Krm50-ZRRdaH9-mpV70uC1PIqtHnjBvRs6oLzmZXC6FLdiT3tF8eQ6tEqeF0ls8yneFcUEBhrggaCGUKgnr1TH60IQ4YYP3PrzuCXsrpag-kyRc1%7EUfPipr-CupOkFHyS7sscluBLcp67ZopVx8iLG%7ETCKZUIlXsa%7EqTcrcyRvm4Ag__&Key-Pair-Id=KVTP0A1DKRTAX\r\n", + "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 108.138.64.36, 108.138.64.49, 108.138.64.121, ...\r\n", + "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.138.64.36|:443... connected.\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HTTP request sent, awaiting response... " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "200 OK\r\n", + "Length: 3441599109 (3.2G) [binary/octet-stream]\r\n", + "Saving to: ‘v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth’\r\n", + "\r\n", + "\r", + " v5-hs2x-L 0%[ ] 0 --.-KB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6 0%[ ] 15.26M 43.8MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6- 0%[ ] 30.52M 47.6MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D 1%[ ] 52.47M 60.3MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4 1%[ ] 61.03M 56.3MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D40 2%[ ] 75.78M 54.8MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D409 2%[ ] 91.55M 50.5MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096 3%[ ] 106.81M 50.3MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096- 3%[ ] 121.32M 52.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096-E 3%[ ] 122.07M 48.2MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5-hs2x-L6-D4096-E0 4%[ ] 136.81M 48.4MB/s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5-hs2x-L6-D4096-E0_ 4%[ ] 150.76M 46.7MB/s eta 67s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-hs2x-L6-D4096-E0_1 5%[> ] 167.33M 47.8MB/s eta 67s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "hs2x-L6-D4096-E0_1- 5%[> ] 182.59M 48.2MB/s eta 67s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "s2x-L6-D4096-E0_1-m 6%[> ] 197.34M 49.5MB/s eta 67s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2x-L6-D4096-E0_1-me 6%[> ] 197.85M 43.4MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-L6-D4096-E0_1-mem 6%[> ] 206.05M 43.3MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D4096-E0_1-mem- 6%[> ] 213.62M 40.9MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D4096-E0_1-mem-c 6%[> ] 228.36M 43.3MB/s eta 71s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D4096-E0_1-mem-ct 7%[> ] 243.63M 42.2MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D4096-E0_1-mem-ctx 7%[> ] 251.29M 40.3MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D4096-E0_1-mem-ctx- 7%[> ] 262.02M 41.2MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4096-E0_1-mem-ctx-8 8%[> ] 274.66M 38.9MB/s eta 69s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "096-E0_1-mem-ctx-8k 8%[> ] 289.92M 41.6MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "96-E0_1-mem-ctx-8k. 9%[> ] 305.18M 42.4MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-E0_1-mem-ctx-8k.p 9%[> ] 323.72M 43.4MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-8k.pt 10%[=> ] 335.69M 45.0MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-8k.pth 10%[=> ] 350.81M 45.9MB/s eta 68s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-8k.pth 10%[=> ] 357.26M 45.3MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-8k.pth 11%[=> ] 366.20M 46.3MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-8k.pth 11%[=> ] 382.15M 47.1MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-8k.pth 11%[=> ] 389.90M 46.1MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-8k.pth 12%[=> ] 407.27M 51.5MB/s eta 64s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-8k.pth 12%[=> ] 411.99M 51.7MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-8k.pth 13%[=> ] 427.25M 50.9MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-8k.pth 13%[=> ] 442.68M 53.5MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-8k.pth 13%[=> ] 457.76M 53.8MB/s eta 62s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-8k.pth 14%[=> ] 473.02M 52.7MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-8k.pth 15%[==> ] 492.96M 55.9MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-8k.pth 15%[==> ] 503.54M 55.6MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8k.pth 15%[==> ] 518.80M 56.3MB/s eta 59s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 16%[==> ] 534.05M 55.2MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 16%[==> ] 549.31M 53.4MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 17%[==> ] 565.78M 53.8MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 17%[==> ] 579.83M 55.4MB/s eta 56s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 18%[==> ] 595.09M 56.1MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 18%[==> ] 608.53M 55.9MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 18%[==> ] 610.35M 51.6MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 19%[==> ] 625.61M 51.7MB/s eta 54s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5- 19%[==> ] 640.87M 52.1MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-h 19%[==> ] 656.13M 51.5MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs 20%[===> ] 667.89M 50.8MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2 20%[===> ] 684.82M 50.9MB/s eta 55s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x 21%[===> ] 701.39M 49.9MB/s eta 53s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x- 21%[===> ] 716.64M 48.9MB/s eta 53s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L 22%[===> ] 726.10M 49.0MB/s eta 53s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6 22%[===> ] 732.42M 47.0MB/s eta 53s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6- 22%[===> ] 747.69M 45.9MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D 23%[===> ] 762.42M 47.5MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4 23%[===> ] 777.68M 47.9MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D40 24%[===> ] 792.94M 48.2MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D409 24%[===> ] 808.20M 52.3MB/s eta 52s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096 25%[====> ] 823.97M 51.4MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096- 25%[====> ] 839.23M 54.1MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096-E 26%[====> ] 854.36M 54.1MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5-hs2x-L6-D4096-E0 26%[====> ] 858.39M 51.8MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5-hs2x-L6-D4096-E0_ 26%[====> ] 869.75M 53.9MB/s eta 50s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-hs2x-L6-D4096-E0_1 26%[====> ] 884.49M 53.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "hs2x-L6-D4096-E0_1- 27%[====> ] 891.62M 53.5MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "s2x-L6-D4096-E0_1-m 27%[====> ] 900.27M 52.3MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2x-L6-D4096-E0_1-me 27%[====> ] 915.53M 52.9MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-L6-D4096-E0_1-mem 28%[====> ] 930.27M 55.8MB/s eta 48s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D4096-E0_1-mem- 28%[====> ] 942.91M 54.9MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D4096-E0_1-mem-c 28%[====> ] 946.04M 55.0MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D4096-E0_1-mem-ct 29%[====> ] 961.30M 54.0MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D4096-E0_1-mem-ctx 29%[====> ] 976.55M 54.7MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D4096-E0_1-mem-ctx- 30%[=====> ] 991.82M 55.6MB/s eta 47s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4096-E0_1-mem-ctx-8 30%[=====> ] 1005M 56.8MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "096-E0_1-mem-ctx-8k 30%[=====> ] 1008M 54.3MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "96-E0_1-mem-ctx-8k. 31%[=====> ] 1022M 53.0MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-E0_1-mem-ctx-8k.p 31%[=====> ] 1.01G 51.1MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-8k.pt 31%[=====> ] 1.01G 50.0MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-8k.pth 31%[=====> ] 1.02G 49.1MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-8k.pth 32%[=====> ] 1.03G 46.6MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-8k.pth 32%[=====> ] 1.04G 45.9MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-8k.pth 32%[=====> ] 1.04G 43.6MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-8k.pth 32%[=====> ] 1.05G 45.4MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-8k.pth 33%[=====> ] 1.06G 42.9MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-8k.pth 33%[=====> ] 1.07G 41.4MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-8k.pth 33%[=====> ] 1.09G 41.8MB/s eta 45s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-8k.pth 34%[=====> ] 1.10G 43.9MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-8k.pth 34%[=====> ] 1.12G 43.9MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-8k.pth 35%[======> ] 1.13G 39.5MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-8k.pth 35%[======> ] 1.13G 36.3MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-8k.pth 35%[======> ] 1.15G 38.6MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8k.pth 35%[======> ] 1.15G 36.6MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 36%[======> ] 1.16G 38.3MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 36%[======> ] 1.18G 40.2MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 37%[======> ] 1.19G 40.1MB/s eta 44s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 37%[======> ] 1.19G 40.0MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 37%[======> ] 1.21G 40.6MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 38%[======> ] 1.22G 44.5MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 38%[======> ] 1.23G 42.9MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 38%[======> ] 1.24G 43.0MB/s eta 43s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5- 39%[======> ] 1.25G 45.6MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-h 39%[======> ] 1.25G 42.6MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs 39%[======> ] 1.27G 40.6MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2 39%[======> ] 1.27G 38.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x 39%[======> ] 1.28G 36.9MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x- 40%[=======> ] 1.30G 39.3MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L 40%[=======> ] 1.31G 43.6MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6 41%[=======> ] 1.33G 42.8MB/s eta 42s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6- 41%[=======> ] 1.34G 42.6MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D 41%[=======> ] 1.34G 40.0MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4 42%[=======> ] 1.36G 41.6MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D40 42%[=======> ] 1.37G 42.2MB/s eta 41s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D409 43%[=======> ] 1.39G 41.5MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096 43%[=======> ] 1.40G 41.5MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096- 43%[=======> ] 1.40G 41.0MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096-E 44%[=======> ] 1.42G 40.3MB/s eta 40s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5-hs2x-L6-D4096-E0 44%[=======> ] 1.43G 42.8MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5-hs2x-L6-D4096-E0_ 45%[========> ] 1.45G 45.0MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-hs2x-L6-D4096-E0_1 45%[========> ] 1.46G 45.3MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "hs2x-L6-D4096-E0_1- 46%[========> ] 1.47G 46.7MB/s eta 39s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "s2x-L6-D4096-E0_1-m 46%[========> ] 1.49G 44.4MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2x-L6-D4096-E0_1-me 46%[========> ] 1.50G 46.1MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-L6-D4096-E0_1-mem 47%[========> ] 1.52G 46.6MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D4096-E0_1-mem- 47%[========> ] 1.53G 49.2MB/s eta 38s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D4096-E0_1-mem-c 48%[========> ] 1.55G 49.1MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D4096-E0_1-mem-ct 48%[========> ] 1.56G 49.9MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D4096-E0_1-mem-ctx 49%[========> ] 1.58G 49.5MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D4096-E0_1-mem-ctx- 49%[========> ] 1.59G 49.7MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4096-E0_1-mem-ctx-8 49%[========> ] 1.59G 49.7MB/s eta 36s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "096-E0_1-mem-ctx-8k 50%[=========> ] 1.61G 53.3MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "96-E0_1-mem-ctx-8k. 50%[=========> ] 1.62G 52.2MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-E0_1-mem-ctx-8k.p 50%[=========> ] 1.62G 49.0MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-8k.pt 51%[=========> ] 1.64G 51.8MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-8k.pth 51%[=========> ] 1.65G 51.4MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-8k.pth 51%[=========> ] 1.65G 49.9MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-8k.pth 51%[=========> ] 1.66G 48.7MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-8k.pth 52%[=========> ] 1.67G 47.6MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-8k.pth 52%[=========> ] 1.68G 47.6MB/s eta 34s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-8k.pth 52%[=========> ] 1.68G 45.6MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-8k.pth 52%[=========> ] 1.70G 45.9MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-8k.pth 53%[=========> ] 1.71G 44.9MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-8k.pth 53%[=========> ] 1.72G 46.1MB/s eta 33s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-8k.pth 54%[=========> ] 1.74G 45.7MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-8k.pth 54%[=========> ] 1.76G 48.4MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-8k.pth 55%[==========> ] 1.77G 47.5MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-8k.pth 55%[==========> ] 1.78G 46.3MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8k.pth 55%[==========> ] 1.79G 45.7MB/s eta 32s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 56%[==========> ] 1.80G 45.4MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 56%[==========> ] 1.82G 47.0MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 56%[==========> ] 1.82G 46.8MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 57%[==========> ] 1.83G 45.9MB/s eta 30s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 57%[==========> ] 1.85G 47.5MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 58%[==========> ] 1.86G 49.6MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 58%[==========> ] 1.88G 49.8MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 58%[==========> ] 1.89G 52.4MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5- 59%[==========> ] 1.89G 49.7MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-h 59%[==========> ] 1.91G 50.1MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs 59%[==========> ] 1.92G 49.4MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2 60%[===========> ] 1.94G 46.8MB/s eta 29s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x 60%[===========> ] 1.95G 46.2MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x- 61%[===========> ] 1.97G 49.2MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L 61%[===========> ] 1.98G 49.0MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6 62%[===========> ] 2.00G 47.4MB/s eta 27s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6- 62%[===========> ] 2.01G 47.4MB/s eta 26s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D 63%[===========> ] 2.03G 50.2MB/s eta 26s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4 63%[===========> ] 2.04G 52.1MB/s eta 26s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D40 63%[===========> ] 2.04G 48.2MB/s eta 26s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D409 64%[===========> ] 2.06G 48.0MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096 65%[============> ] 2.08G 52.1MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096- 65%[============> ] 2.09G 50.3MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096-E 65%[============> ] 2.09G 50.6MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5-hs2x-L6-D4096-E0 65%[============> ] 2.12G 51.2MB/s eta 25s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5-hs2x-L6-D4096-E0_ 66%[============> ] 2.13G 54.5MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-hs2x-L6-D4096-E0_1 66%[============> ] 2.14G 52.0MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "hs2x-L6-D4096-E0_1- 67%[============> ] 2.16G 47.3MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "s2x-L6-D4096-E0_1-m 67%[============> ] 2.17G 47.3MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2x-L6-D4096-E0_1-me 67%[============> ] 2.17G 45.1MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-L6-D4096-E0_1-mem 68%[============> ] 2.19G 45.3MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D4096-E0_1-mem- 68%[============> ] 2.21G 44.4MB/s eta 23s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D4096-E0_1-mem-c 69%[============> ] 2.22G 46.5MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D4096-E0_1-mem-ct 69%[============> ] 2.23G 46.8MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D4096-E0_1-mem-ctx 70%[=============> ] 2.25G 46.6MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D4096-E0_1-mem-ctx- 70%[=============> ] 2.26G 49.4MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4096-E0_1-mem-ctx-8 70%[=============> ] 2.27G 47.9MB/s eta 21s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "096-E0_1-mem-ctx-8k 71%[=============> ] 2.29G 50.5MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "96-E0_1-mem-ctx-8k. 71%[=============> ] 2.29G 48.2MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-E0_1-mem-ctx-8k.p 72%[=============> ] 2.31G 47.9MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-8k.pt 72%[=============> ] 2.32G 49.1MB/s eta 20s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-8k.pth 72%[=============> ] 2.34G 48.2MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-8k.pth 73%[=============> ] 2.35G 47.7MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-8k.pth 73%[=============> ] 2.37G 54.0MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-8k.pth 73%[=============> ] 2.37G 52.6MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-8k.pth 74%[=============> ] 2.38G 52.6MB/s eta 19s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-8k.pth 75%[==============> ] 2.41G 56.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-8k.pth 75%[==============> ] 2.42G 56.0MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-8k.pth 75%[==============> ] 2.43G 52.6MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-8k.pth 75%[==============> ] 2.43G 53.3MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-8k.pth 76%[==============> ] 2.44G 51.4MB/s eta 17s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-8k.pth 77%[==============> ] 2.47G 55.4MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-8k.pth 77%[==============> ] 2.49G 55.4MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-8k.pth 77%[==============> ] 2.49G 52.9MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8k.pth 78%[==============> ] 2.50G 54.4MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 78%[==============> ] 2.50G 52.2MB/s eta 16s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 78%[==============> ] 2.52G 52.0MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "pth 79%[==============> ] 2.53G 51.8MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "th 79%[==============> ] 2.55G 51.8MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "h 79%[==============> ] 2.56G 51.4MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " 80%[===============> ] 2.57G 51.6MB/s eta 15s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v 80%[===============> ] 2.59G 55.8MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5 81%[===============> ] 2.61G 55.7MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5- 81%[===============> ] 2.62G 50.9MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-h 81%[===============> ] 2.62G 52.0MB/s eta 13s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs 82%[===============> ] 2.65G 54.2MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2 83%[===============> ] 2.67G 56.3MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x 83%[===============> ] 2.67G 54.9MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x- 83%[===============> ] 2.68G 56.5MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L 84%[===============> ] 2.70G 55.0MB/s eta 12s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6 84%[===============> ] 2.71G 54.5MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6- 84%[===============> ] 2.71G 53.3MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D 85%[================> ] 2.73G 54.8MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4 85%[================> ] 2.74G 54.8MB/s eta 11s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D40 86%[================> ] 2.76G 51.6MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D409 86%[================> ] 2.77G 53.2MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096 86%[================> ] 2.77G 50.1MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096- 86%[================> ] 2.79G 48.3MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + " v5-hs2x-L6-D4096-E 87%[================> ] 2.80G 49.4MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "v5-hs2x-L6-D4096-E0 87%[================> ] 2.82G 46.2MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "5-hs2x-L6-D4096-E0_ 88%[================> ] 2.83G 45.8MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-hs2x-L6-D4096-E0_1 88%[================> ] 2.85G 47.6MB/s eta 9s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "hs2x-L6-D4096-E0_1- 89%[================> ] 2.86G 46.2MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "s2x-L6-D4096-E0_1-m 89%[================> ] 2.88G 48.9MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "2x-L6-D4096-E0_1-me 90%[=================> ] 2.89G 44.3MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-L6-D4096-E0_1-mem 90%[=================> ] 2.89G 42.7MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-L6-D4096-E0_1-mem- 90%[=================> ] 2.91G 42.9MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "L6-D4096-E0_1-mem-c 91%[=================> ] 2.92G 43.5MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-D4096-E0_1-mem-ct 91%[=================> ] 2.93G 43.7MB/s eta 7s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-D4096-E0_1-mem-ctx 92%[=================> ] 2.95G 46.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "D4096-E0_1-mem-ctx- 92%[=================> ] 2.97G 50.0MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "4096-E0_1-mem-ctx-8 93%[=================> ] 2.98G 52.4MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "096-E0_1-mem-ctx-8k 93%[=================> ] 2.99G 50.7MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "96-E0_1-mem-ctx-8k. 93%[=================> ] 3.01G 50.3MB/s eta 5s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "6-E0_1-mem-ctx-8k.p 93%[=================> ] 3.01G 50.1MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-E0_1-mem-ctx-8k.pt 94%[=================> ] 3.02G 51.6MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "E0_1-mem-ctx-8k.pth 95%[==================> ] 3.05G 52.7MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "0_1-mem-ctx-8k.pth 95%[==================> ] 3.05G 49.5MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "_1-mem-ctx-8k.pth 95%[==================> ] 3.06G 48.1MB/s eta 4s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "1-mem-ctx-8k.pth 95%[==================> ] 3.07G 48.9MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-mem-ctx-8k.pth 96%[==================> ] 3.08G 51.5MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "mem-ctx-8k.pth 96%[==================> ] 3.09G 51.4MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "em-ctx-8k.pth 96%[==================> ] 3.10G 49.1MB/s eta 3s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "m-ctx-8k.pth 97%[==================> ] 3.11G 51.1MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-ctx-8k.pth 97%[==================> ] 3.13G 49.6MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "ctx-8k.pth 98%[==================> ] 3.14G 49.6MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "tx-8k.pth 98%[==================> ] 3.14G 48.5MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "x-8k.pth 98%[==================> ] 3.16G 46.8MB/s eta 2s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "-8k.pth 98%[==================> ] 3.17G 47.2MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "8k.pth 99%[==================> ] 3.18G 47.1MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "k.pth 99%[==================> ] 3.19G 46.1MB/s eta 1s " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + ".pth 99%[==================> ] 3.20G 49.3MB/s eta 1s \r", + "v5-hs2x-L6-D4096-E0 100%[===================>] 3.21G 49.6MB/s in 68s \r\n", + "\r\n", + "2023-08-25 18:14:12 (48.3 MB/s) - ‘v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth’ saved [3441599109/3441599109]\r\n", + "\r\n" + ] + } + ], + "source": [ + "# Lets download the custom models\n", + "!mkdir -p ../../../model/\n", + "!cd ../../../model/ && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-headsize2x/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f0d4b8a8", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:14:12.521024Z", + "iopub.status.busy": "2023-08-25T18:14:12.520099Z", + "iopub.status.idle": "2023-08-25T18:14:12.772300Z", + "shell.execute_reply": "2023-08-25T18:14:12.771216Z" + }, + "papermill": { + "duration": 0.272197, + "end_time": "2023-08-25T18:14:12.774408", + "exception": false, + "start_time": "2023-08-25T18:14:12.502211", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 3.3G\r\n", + "-rw-r--r-- 1 root root 3.3G Aug 25 07:41 v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\r\n" + ] + } + ], + "source": [ + "!ls -lh ../../../model/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9117b2b1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:14:12.809567Z", + "iopub.status.busy": "2023-08-25T18:14:12.808669Z", + "iopub.status.idle": "2023-08-25T18:14:13.062795Z", + "shell.execute_reply": "2023-08-25T18:14:13.061717Z" + }, + "papermill": { + "duration": 0.274291, + "end_time": "2023-08-25T18:14:13.064890", + "exception": false, + "start_time": "2023-08-25T18:14:12.790599", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/usr/bin/sh: 1: python: not found\r\n" + ] + } + ], + "source": [ + "!python -m pip install aiocsv aiofiles" + ] + }, + { + "cell_type": "markdown", + "id": "fd51d3a7", + "metadata": { + "papermill": { + "duration": 0.016134, + "end_time": "2023-08-25T18:14:13.097474", + "exception": false, + "start_time": "2023-08-25T18:14:13.081340", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# 1B5 V5 benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9057d55b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:14:13.132779Z", + "iopub.status.busy": "2023-08-25T18:14:13.131818Z", + "iopub.status.idle": "2023-08-25T18:15:27.785714Z", + "shell.execute_reply": "2023-08-25T18:15:27.784647Z" + }, + "papermill": { + "duration": 74.674403, + "end_time": "2023-08-25T18:15:27.788104", + "exception": false, + "start_time": "2023-08-25T18:14:13.113701", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-08-25 18:14:17,673] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "###\r\n", + "### Model validation start ###\r\n", + "###\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5 tokens : 100.0% similarity, with 5 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10 tokens : 100.0% similarity, with 10 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15 tokens : 100.0% similarity, with 15 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 20 tokens : 100.0% similarity, with 20 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 25 tokens : 100.0% similarity, with 25 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 30 tokens : 100.0% similarity, with 30 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 35 tokens : 100.0% similarity, with 35 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 40 tokens : 100.0% similarity, with 40 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 45 tokens : 100.0% similarity, with 45 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 50 tokens : 100.0% similarity, with 50 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 55 tokens : 100.0% similarity, with 55 matched token, and 0 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 60 tokens : 98.33333333333333% similarity, with 59 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 65 tokens : 98.46153846153847% similarity, with 64 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 70 tokens : 98.57142857142858% similarity, with 69 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 75 tokens : 98.66666666666667% similarity, with 74 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 80 tokens : 96.25% similarity, with 77 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 85 tokens : 96.47058823529412% similarity, with 82 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 90 tokens : 96.66666666666667% similarity, with 87 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 95 tokens : 97.89473684210527% similarity, with 93 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 100 tokens : 99.0% similarity, with 99 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 105 tokens : 99.04761904761905% similarity, with 104 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 110 tokens : 98.18181818181819% similarity, with 108 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 115 tokens : 98.26086956521739% similarity, with 113 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 120 tokens : 98.33333333333333% similarity, with 118 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 125 tokens : 99.2% similarity, with 124 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 130 tokens : 99.23076923076923% similarity, with 129 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 135 tokens : 98.51851851851852% similarity, with 133 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 140 tokens : 98.57142857142858% similarity, with 138 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 145 tokens : 98.62068965517241% similarity, with 143 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 150 tokens : 98.66666666666667% similarity, with 148 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 160 tokens : 98.125% similarity, with 157 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 170 tokens : 97.6470588235294% similarity, with 166 matched token, and 4 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 180 tokens : 97.77777777777777% similarity, with 176 matched token, and 4 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 190 tokens : 98.42105263157895% similarity, with 187 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 200 tokens : 98.5% similarity, with 197 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 210 tokens : 99.04761904761905% similarity, with 208 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 220 tokens : 98.63636363636363% similarity, with 217 matched token, and 3 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 230 tokens : 99.1304347826087% similarity, with 228 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 240 tokens : 99.58333333333333% similarity, with 239 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 250 tokens : 99.6% similarity, with 249 matched token, and 1 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 260 tokens : 99.23076923076923% similarity, with 258 matched token, and 2 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 270 tokens : 98.14814814814815% similarity, with 265 matched token, and 5 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 280 tokens : 98.21428571428571% similarity, with 275 matched token, and 5 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 290 tokens : 97.93103448275862% similarity, with 284 matched token, and 6 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 300 tokens : 98.0% similarity, with 294 matched token, and 6 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 325 tokens : 98.15384615384616% similarity, with 319 matched token, and 6 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 350 tokens : 97.42857142857143% similarity, with 341 matched token, and 9 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 375 tokens : 96.0% similarity, with 360 matched token, and 15 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 400 tokens : 96.25% similarity, with 385 matched token, and 15 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 425 tokens : 95.52941176470588% similarity, with 406 matched token, and 19 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 450 tokens : 95.77777777777777% similarity, with 431 matched token, and 19 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 475 tokens : 95.36842105263158% similarity, with 453 matched token, and 22 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 500 tokens : 95.19999999999999% similarity, with 476 matched token, and 24 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 525 tokens : 94.47619047619048% similarity, with 496 matched token, and 29 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 550 tokens : 94.72727272727272% similarity, with 521 matched token, and 29 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 575 tokens : 94.78260869565217% similarity, with 545 matched token, and 30 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 600 tokens : 94.83333333333334% similarity, with 569 matched token, and 31 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 625 tokens : 94.88% similarity, with 593 matched token, and 32 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 650 tokens : 94.0% similarity, with 611 matched token, and 39 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 675 tokens : 94.37037037037037% similarity, with 637 matched token, and 38 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 700 tokens : 93.28571428571428% similarity, with 653 matched token, and 47 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 750 tokens : 93.2% similarity, with 699 matched token, and 51 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 800 tokens : 91.25% similarity, with 730 matched token, and 70 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 850 tokens : 90.23529411764706% similarity, with 767 matched token, and 83 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 900 tokens : 90.0% similarity, with 810 matched token, and 90 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 950 tokens : 89.57894736842105% similarity, with 851 matched token, and 99 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1000 tokens : 88.5% similarity, with 885 matched token, and 115 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Finished baseline model to eval output predictive matching (aka 0 memory?), for 1000 tokens\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "###\r\n", + "### Model validation end ###\r\n", + "###\r\n" + ] + } + ], + "source": [ + "!python3 ./memory_script/eval_v5headsize2x_memory_guided.py \"../../../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\" \"./logs/v5-hs2x-L6-D4096-E0_1-1k.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "20631917", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:15:27.831092Z", + "iopub.status.busy": "2023-08-25T18:15:27.830126Z", + "iopub.status.idle": "2023-08-25T18:19:42.015564Z", + "shell.execute_reply": "2023-08-25T18:19:42.014647Z" + }, + "papermill": { + "duration": 254.209987, + "end_time": "2023-08-25T18:19:42.018034", + "exception": false, + "start_time": "2023-08-25T18:15:27.808047", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-08-25 18:15:32,415] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "###\r\n", + "### Model validation start ###\r\n", + "###\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1100 tokens : 86.18181818181819% similarity, with 948 matched token, and 152 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1150 tokens : 85.04347826086956% similarity, with 978 matched token, and 172 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1200 tokens : 84.66666666666667% similarity, with 1016 matched token, and 184 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1250 tokens : 84.24000000000001% similarity, with 1053 matched token, and 197 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1300 tokens : 83.07692307692308% similarity, with 1080 matched token, and 220 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1350 tokens : 82.2962962962963% similarity, with 1111 matched token, and 239 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1400 tokens : 81.28571428571428% similarity, with 1138 matched token, and 262 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1450 tokens : 80.55172413793103% similarity, with 1168 matched token, and 282 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1500 tokens : 79.86666666666666% similarity, with 1198 matched token, and 302 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1550 tokens : 79.16129032258064% similarity, with 1227 matched token, and 323 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1600 tokens : 77.3125% similarity, with 1237 matched token, and 363 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1650 tokens : 75.63636363636364% similarity, with 1248 matched token, and 402 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1700 tokens : 74.41176470588235% similarity, with 1265 matched token, and 435 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1750 tokens : 72.39999999999999% similarity, with 1267 matched token, and 483 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1800 tokens : 70.22222222222221% similarity, with 1264 matched token, and 536 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1850 tokens : 69.62162162162161% similarity, with 1288 matched token, and 562 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1900 tokens : 68.84210526315789% similarity, with 1308 matched token, and 592 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 1950 tokens : 67.02564102564102% similarity, with 1307 matched token, and 643 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2000 tokens : 66.3% similarity, with 1326 matched token, and 674 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2050 tokens : 64.04878048780488% similarity, with 1313 matched token, and 737 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2100 tokens : 63.66666666666667% similarity, with 1337 matched token, and 763 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2150 tokens : 62.83720930232558% similarity, with 1351 matched token, and 799 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2200 tokens : 61.68181818181818% similarity, with 1357 matched token, and 843 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2250 tokens : 60.84444444444445% similarity, with 1369 matched token, and 881 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2300 tokens : 58.434782608695656% similarity, with 1344 matched token, and 956 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2350 tokens : 57.48936170212766% similarity, with 1351 matched token, and 999 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2400 tokens : 56.458333333333336% similarity, with 1355 matched token, and 1045 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2450 tokens : 55.224489795918366% similarity, with 1353 matched token, and 1097 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2500 tokens : 54.400000000000006% similarity, with 1360 matched token, and 1140 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2550 tokens : 53.333333333333336% similarity, with 1360 matched token, and 1190 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2600 tokens : 51.76923076923077% similarity, with 1346 matched token, and 1254 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2650 tokens : 50.79245283018869% similarity, with 1346 matched token, and 1304 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2700 tokens : 49.81481481481482% similarity, with 1345 matched token, and 1355 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2750 tokens : 49.163636363636364% similarity, with 1352 matched token, and 1398 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2800 tokens : 48.285714285714285% similarity, with 1352 matched token, and 1448 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2850 tokens : 47.40350877192982% similarity, with 1351 matched token, and 1499 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2900 tokens : 46.310344827586206% similarity, with 1343 matched token, and 1557 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 2950 tokens : 45.52542372881356% similarity, with 1343 matched token, and 1607 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3000 tokens : 44.56666666666666% similarity, with 1337 matched token, and 1663 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3050 tokens : 43.77049180327869% similarity, with 1335 matched token, and 1715 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3100 tokens : 42.45161290322581% similarity, with 1316 matched token, and 1784 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3150 tokens : 41.84126984126984% similarity, with 1318 matched token, and 1832 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3200 tokens : 41.1875% similarity, with 1318 matched token, and 1882 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3250 tokens : 40.43076923076923% similarity, with 1314 matched token, and 1936 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3300 tokens : 39.484848484848484% similarity, with 1303 matched token, and 1997 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3350 tokens : 38.776119402985074% similarity, with 1299 matched token, and 2051 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3400 tokens : 37.6764705882353% similarity, with 1281 matched token, and 2119 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3450 tokens : 36.608695652173914% similarity, with 1263 matched token, and 2187 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3500 tokens : 35.97142857142857% similarity, with 1259 matched token, and 2241 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3550 tokens : 35.267605633802816% similarity, with 1252 matched token, and 2298 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3600 tokens : 34.72222222222222% similarity, with 1250 matched token, and 2350 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3650 tokens : 33.91780821917808% similarity, with 1238 matched token, and 2412 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3700 tokens : 33.37837837837838% similarity, with 1235 matched token, and 2465 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3750 tokens : 32.4% similarity, with 1215 matched token, and 2535 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3800 tokens : 31.5% similarity, with 1197 matched token, and 2603 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3850 tokens : 30.80519480519481% similarity, with 1186 matched token, and 2664 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3900 tokens : 29.897435897435898% similarity, with 1166 matched token, and 2734 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 3950 tokens : 29.012658227848103% similarity, with 1146 matched token, and 2804 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4000 tokens : 28.025% similarity, with 1121 matched token, and 2879 token mismatch\r\n", + "###\r\n", + "### Model validation end ###\r\n", + "###\r\n" + ] + } + ], + "source": [ + "!python3 ./memory_script/eval_v5headsize2x_memory_guided.py \"../../../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\" \"./logs/v5-hs2x-L6-D4096-E0_1-4k.csv\" 1100 4000" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0ea3fdfd", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-25T18:19:42.074848Z", + "iopub.status.busy": "2023-08-25T18:19:42.074090Z", + "iopub.status.idle": "2023-08-25T19:21:09.033195Z", + "shell.execute_reply": "2023-08-25T19:21:09.032367Z" + }, + "papermill": { + "duration": 3686.98662, + "end_time": "2023-08-25T19:21:09.035206", + "exception": false, + "start_time": "2023-08-25T18:19:42.048586", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-08-25 18:19:46,719] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "###\r\n", + "### Model validation start ###\r\n", + "###\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4100 tokens : 26.195121951219512% similarity, with 1074 matched token, and 3026 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4200 tokens : 24.952380952380953% similarity, with 1048 matched token, and 3152 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4300 tokens : 23.13953488372093% similarity, with 995 matched token, and 3305 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4400 tokens : 21.386363636363637% similarity, with 941 matched token, and 3459 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4500 tokens : 19.2% similarity, with 864 matched token, and 3636 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4600 tokens : 16.97826086956522% similarity, with 781 matched token, and 3819 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4700 tokens : 15.319148936170212% similarity, with 720 matched token, and 3980 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4800 tokens : 13.645833333333332% similarity, with 655 matched token, and 4145 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 4900 tokens : 12.306122448979592% similarity, with 603 matched token, and 4297 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5000 tokens : 10.92% similarity, with 546 matched token, and 4454 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5100 tokens : 9.450980392156863% similarity, with 482 matched token, and 4618 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5200 tokens : 7.98076923076923% similarity, with 415 matched token, and 4785 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5300 tokens : 7.283018867924529% similarity, with 386 matched token, and 4914 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5400 tokens : 6.12962962962963% similarity, with 331 matched token, and 5069 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5500 tokens : 5.50909090909091% similarity, with 303 matched token, and 5197 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5600 tokens : 5.017857142857142% similarity, with 281 matched token, and 5319 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5700 tokens : 4.385964912280701% similarity, with 250 matched token, and 5450 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5800 tokens : 4.068965517241379% similarity, with 236 matched token, and 5564 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 5900 tokens : 3.8474576271186436% similarity, with 227 matched token, and 5673 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6000 tokens : 3.716666666666667% similarity, with 223 matched token, and 5777 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6100 tokens : 3.4918032786885247% similarity, with 213 matched token, and 5887 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6200 tokens : 3.3225806451612905% similarity, with 206 matched token, and 5994 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6300 tokens : 3.2222222222222223% similarity, with 203 matched token, and 6097 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6400 tokens : 3.2031249999999996% similarity, with 205 matched token, and 6195 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6500 tokens : 3.1846153846153844% similarity, with 207 matched token, and 6293 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6600 tokens : 3.1363636363636362% similarity, with 207 matched token, and 6393 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6700 tokens : 3.119402985074627% similarity, with 209 matched token, and 6491 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6800 tokens : 3.088235294117647% similarity, with 210 matched token, and 6590 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 6900 tokens : 3.173913043478261% similarity, with 219 matched token, and 6681 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7000 tokens : 3.1285714285714286% similarity, with 219 matched token, and 6781 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7100 tokens : 3.140845070422535% similarity, with 223 matched token, and 6877 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7200 tokens : 3.111111111111111% similarity, with 224 matched token, and 6976 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7300 tokens : 3.1643835616438354% similarity, with 231 matched token, and 7069 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7400 tokens : 3.1621621621621623% similarity, with 234 matched token, and 7166 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7500 tokens : 3.1466666666666665% similarity, with 236 matched token, and 7264 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7600 tokens : 3.1578947368421053% similarity, with 240 matched token, and 7360 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7700 tokens : 3.1558441558441555% similarity, with 243 matched token, and 7457 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7800 tokens : 3.1538461538461537% similarity, with 246 matched token, and 7554 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 7900 tokens : 3.151898734177215% similarity, with 249 matched token, and 7651 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8000 tokens : 3.1375% similarity, with 251 matched token, and 7749 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8100 tokens : 3.185185185185185% similarity, with 258 matched token, and 7842 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8200 tokens : 3.1707317073170733% similarity, with 260 matched token, and 7940 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8300 tokens : 3.156626506024096% similarity, with 262 matched token, and 8038 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8400 tokens : 3.130952380952381% similarity, with 263 matched token, and 8137 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8500 tokens : 3.2% similarity, with 272 matched token, and 8228 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8600 tokens : 3.1976744186046515% similarity, with 275 matched token, and 8325 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8700 tokens : 3.206896551724138% similarity, with 279 matched token, and 8421 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8800 tokens : 3.193181818181818% similarity, with 281 matched token, and 8519 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 8900 tokens : 3.191011235955056% similarity, with 284 matched token, and 8616 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9000 tokens : 3.188888888888889% similarity, with 287 matched token, and 8713 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9100 tokens : 3.208791208791209% similarity, with 292 matched token, and 8808 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9200 tokens : 3.2282608695652173% similarity, with 297 matched token, and 8903 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9300 tokens : 3.21505376344086% similarity, with 299 matched token, and 9001 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9400 tokens : 3.1914893617021276% similarity, with 300 matched token, and 9100 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9500 tokens : 3.2105263157894735% similarity, with 305 matched token, and 9195 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9600 tokens : 3.21875% similarity, with 309 matched token, and 9291 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9700 tokens : 3.216494845360825% similarity, with 312 matched token, and 9388 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9800 tokens : 3.2040816326530615% similarity, with 314 matched token, and 9486 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 9900 tokens : 3.2222222222222223% similarity, with 319 matched token, and 9581 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10000 tokens : 3.25% similarity, with 325 matched token, and 9675 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10100 tokens : 3.2574257425742577% similarity, with 329 matched token, and 9771 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10200 tokens : 3.2549019607843137% similarity, with 332 matched token, and 9868 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10300 tokens : 3.2427184466019416% similarity, with 334 matched token, and 9966 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10400 tokens : 3.240384615384615% similarity, with 337 matched token, and 10063 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10500 tokens : 3.257142857142857% similarity, with 342 matched token, and 10158 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10600 tokens : 3.2547169811320753% similarity, with 345 matched token, and 10255 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10700 tokens : 3.2710280373831773% similarity, with 350 matched token, and 10350 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10800 tokens : 3.2870370370370368% similarity, with 355 matched token, and 10445 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 10900 tokens : 3.2660550458715596% similarity, with 356 matched token, and 10544 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11000 tokens : 3.2818181818181817% similarity, with 361 matched token, and 10639 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11100 tokens : 3.288288288288288% similarity, with 365 matched token, and 10735 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11200 tokens : 3.2857142857142856% similarity, with 368 matched token, and 10832 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11300 tokens : 3.2920353982300887% similarity, with 372 matched token, and 10928 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11400 tokens : 3.324561403508772% similarity, with 379 matched token, and 11021 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11500 tokens : 3.3391304347826085% similarity, with 384 matched token, and 11116 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11600 tokens : 3.3448275862068964% similarity, with 388 matched token, and 11212 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11700 tokens : 3.3247863247863245% similarity, with 389 matched token, and 11311 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11800 tokens : 3.347457627118644% similarity, with 395 matched token, and 11405 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 11900 tokens : 3.3361344537815127% similarity, with 397 matched token, and 11503 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12000 tokens : 3.3416666666666663% similarity, with 401 matched token, and 11599 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12100 tokens : 3.3636363636363638% similarity, with 407 matched token, and 11693 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12200 tokens : 3.344262295081967% similarity, with 408 matched token, and 11792 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12300 tokens : 3.3577235772357725% similarity, with 413 matched token, and 11887 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12400 tokens : 3.362903225806452% similarity, with 417 matched token, and 11983 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12500 tokens : 3.3680000000000003% similarity, with 421 matched token, and 12079 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12600 tokens : 3.3650793650793656% similarity, with 424 matched token, and 12176 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12700 tokens : 3.362204724409449% similarity, with 427 matched token, and 12273 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12800 tokens : 3.359375% similarity, with 430 matched token, and 12370 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 12900 tokens : 3.3643410852713176% similarity, with 434 matched token, and 12466 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13000 tokens : 3.3692307692307697% similarity, with 438 matched token, and 12562 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13100 tokens : 3.381679389312977% similarity, with 443 matched token, and 12657 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13200 tokens : 3.3939393939393945% similarity, with 448 matched token, and 12752 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13300 tokens : 3.37593984962406% similarity, with 449 matched token, and 12851 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13400 tokens : 3.395522388059701% similarity, with 455 matched token, and 12945 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13500 tokens : 3.3851851851851853% similarity, with 457 matched token, and 13043 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13600 tokens : 3.4044117647058822% similarity, with 463 matched token, and 13137 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13700 tokens : 3.386861313868613% similarity, with 464 matched token, and 13236 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13800 tokens : 3.398550724637681% similarity, with 469 matched token, and 13331 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 13900 tokens : 3.3884892086330933% similarity, with 471 matched token, and 13429 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14000 tokens : 3.4357142857142855% similarity, with 481 matched token, and 13519 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14100 tokens : 3.4255319148936167% similarity, with 483 matched token, and 13617 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14200 tokens : 3.408450704225352% similarity, with 484 matched token, and 13716 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14300 tokens : 3.4055944055944054% similarity, with 487 matched token, and 13813 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14400 tokens : 3.4166666666666665% similarity, with 492 matched token, and 13908 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14500 tokens : 3.420689655172414% similarity, with 496 matched token, and 14004 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14600 tokens : 3.404109589041096% similarity, with 497 matched token, and 14103 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14700 tokens : 3.4421768707482996% similarity, with 506 matched token, and 14194 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14800 tokens : 3.4391891891891895% similarity, with 509 matched token, and 14291 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 14900 tokens : 3.422818791946309% similarity, with 510 matched token, and 14390 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15000 tokens : 3.42% similarity, with 513 matched token, and 14487 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15100 tokens : 3.4172185430463573% similarity, with 516 matched token, and 14584 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15200 tokens : 3.4078947368421058% similarity, with 518 matched token, and 14682 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15300 tokens : 3.4117647058823533% similarity, with 522 matched token, and 14778 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15400 tokens : 3.4090909090909087% similarity, with 525 matched token, and 14875 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15500 tokens : 3.4000000000000004% similarity, with 527 matched token, and 14973 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15600 tokens : 3.4038461538461537% similarity, with 531 matched token, and 15069 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15700 tokens : 3.4012738853503186% similarity, with 534 matched token, and 15166 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15800 tokens : 3.3924050632911396% similarity, with 536 matched token, and 15264 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 15900 tokens : 3.3962264150943398% similarity, with 540 matched token, and 15360 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "## Model validation for 16000 tokens : 3.3875% similarity, with 542 matched token, and 15458 token mismatch\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "###\r\n", + "### Model validation end ###\r\n", + "###\r\n" + ] + } + ], + "source": [ + "!python3 ./memory_script/eval_v5headsize2x_memory_guided.py \"../../../model/v5-hs2x-L6-D4096-E0_1-mem-ctx-8k.pth\" \"./logs/v5-hs2x-L6-D4096-E0_1-16k.csv\" 4100 16000" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rwkv-infctx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 4086.862551, + "end_time": "2023-08-25T19:21:09.390101", + "environment_variables": {}, + "exception": null, + "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/memory-bench/Benchmark-V5headsize2x.ipynb", + "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/memory-bench/Benchmark-V5headsize2x.ipynb", + "parameters": {}, + "start_time": "2023-08-25T18:13:02.527550", + "version": "2.4.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file