{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Run all the LM-Evals\n", "Batch by batch : In accordence to the first letter" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MODEL_REPO: RWKV/rwkv-5-world-7b\n" ] } ], "source": [ "# Configure the batch size to use, for inference, default=8 is adjusted for 4090's (24GB vram)\n", "MODEL_REPO=\"RWKV/rwkv-5-world-7b\"\n", "OUTPUT_PREFIX=\"RB/Eagle-7B-1T\"\n", "print(\"MODEL_REPO: \", MODEL_REPO)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The following values were not passed to `accelerate launch` and had defaults used instead:\n", "\t`--num_processes` was set to a value of `8`\n", "\t\tMore than one GPU was found, enabling multi-GPU training.\n", "\t\tIf this was unintended please pass in `--num_processes=1`.\n", "\t`--num_machines` was set to a value of `1`\n", "\t`--mixed_precision` was set to a value of `'no'`\n", "\t`--dynamo_backend` was set to a value of `'no'`\n", "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n", "Using RTX 3090 or 4000 series which doesn't support faster communication speedups. Ensuring P2P and IB communications are disabled.\n", "2024-02-01:00:29:37,579 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,579 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,627 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,627 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,641 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,641 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,710 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,717 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,717 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,717 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,717 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,723 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,723 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,733 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,733 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,743 INFO [utils.py:145] Note: detected 160 virtual cores but NumExpr set to maximum of 64, check \"NUMEXPR_MAX_THREADS\" environment variable.\n", "2024-02-01:00:29:37,743 INFO [utils.py:148] Note: NumExpr detected 160 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", "2024-02-01:00:29:37,749 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,763 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,864 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,866 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,873 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,874 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:37,881 INFO [config.py:58] PyTorch version 2.2.0 available.\n", "2024-02-01:00:29:39,615 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,620 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,625 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,862 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,864 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,870 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,907 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:39,991 INFO [__main__.py:156] Verbosity set to INFO\n", "2024-02-01:00:29:42,187 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,199 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,211 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,381 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,398 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,460 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,504 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:42,518 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,334 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,336 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,340 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,341 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,360 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,361 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,488 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,489 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,520 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,522 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,609 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,610 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,627 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,630 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:44,647 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details.\n", "2024-02-01:00:29:44,648 INFO [__main__.py:229] Selected Tasks: ['arc_challenge']\n", "2024-02-01:00:29:45,903 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:45,931 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:45,959 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:45,967 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:45,988 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:45,998 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,011 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,016 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,027 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,039 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,041 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,044 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,045 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,056 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,067 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,069 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,072 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,081 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,083 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,087 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,098 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,099 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,104 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,110 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,114 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,118 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,128 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,129 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,133 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,138 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,142 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,149 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,157 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,159 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,163 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,167 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,175 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,180 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,186 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,187 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,188 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,192 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,196 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,203 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,211 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,215 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,216 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,216 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,217 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,220 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,224 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,235 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,241 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,244 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,245 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,245 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,246 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,250 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,253 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,265 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,273 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,275 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,277 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,278 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,279 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,280 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,286 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,295 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,303 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,306 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,307 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,308 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,309 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,309 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,315 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,326 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,332 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,335 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,336 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,338 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,338 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,338 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,345 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,355 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,361 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,365 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,365 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,367 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,367 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,369 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,374 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,387 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,392 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,393 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,395 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,395 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,396 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,397 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,402 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,418 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,421 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,422 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,426 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,426 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,427 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,427 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,434 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,448 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,451 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,451 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,455 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,457 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,457 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,458 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,463 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,479 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,481 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,481 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,487 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,487 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,493 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,499 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,499 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,507 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,511 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,511 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,517 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,517 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,521 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,528 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,529 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,539 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,539 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,540 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,545 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,546 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,550 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,558 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,561 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,568 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,569 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,572 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,574 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,586 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,588 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,590 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,591 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,599 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,599 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,602 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,604 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,615 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,619 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,622 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,623 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,629 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,633 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,634 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,635 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,645 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,649 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,652 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,654 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,658 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,664 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,667 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,670 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,674 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,677 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,680 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,687 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,688 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,694 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,699 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,701 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,705 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,710 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,710 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,720 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,722 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,723 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,729 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,734 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,735 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,740 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,740 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,750 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,751 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,752 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,760 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,764 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,766 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,770 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,773 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,781 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,782 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,783 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,790 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,793 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,796 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,798 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,802 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,811 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,811 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,815 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,820 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,823 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,826 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,827 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,831 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,840 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,840 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,845 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,850 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,852 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,855 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,861 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,870 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,870 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,875 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,879 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,881 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,884 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,890 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,898 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,899 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,903 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,909 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,911 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,912 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,921 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,928 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,929 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,933 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,939 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,941 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,942 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,951 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,957 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,958 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,969 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,971 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,980 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,985 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,997 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:46,999 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,014 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,026 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,029 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,055 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,059 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,083 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,087 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,110 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,115 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:29:47,142 INFO [modeling_rwkv5.py:268] Could not load the custom CUDA kernel for RWKV5 attention.\n", "2024-02-01:00:30:06,252 INFO [huggingface.py:302] Using 8 devices with data parallelism\n", "2024-02-01:00:30:06,522 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:06,522 INFO [task.py:363] Building contexts for task on rank 4...\n", "2024-02-01:00:30:07,629 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:07,629 INFO [task.py:363] Building contexts for task on rank 1...\n", "2024-02-01:00:30:07,778 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:07,778 INFO [task.py:363] Building contexts for task on rank 5...\n", "2024-02-01:00:30:08,137 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:08,137 INFO [task.py:363] Building contexts for task on rank 3...\n", "2024-02-01:00:30:08,970 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:08,970 INFO [task.py:363] Building contexts for task on rank 7...\n", "2024-02-01:00:30:09,169 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:09,169 INFO [task.py:363] Building contexts for task on rank 6...\n", "2024-02-01:00:30:09,193 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:09,193 INFO [task.py:363] Building contexts for task on rank 2...\n", "2024-02-01:00:30:09,341 WARNING [evaluator.py:143] Overwriting default num_fewshot of arc_challenge from None to 25\n", "2024-02-01:00:30:09,341 INFO [task.py:363] Building contexts for task on rank 0...\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "2024-02-01:00:30:17,987 INFO [evaluator.py:321] Running loglikelihood requests\n", "Passed argument batch_size = auto:1. Detecting largest batch size\n", "Passed argument batch_size = auto:1. Detecting largest batch size\n", "Passed argument batch_size = auto:1. Detecting largest batch size\n", " 0%| | 0/587 [00:00 > >) + 0x1e6 (0x7faba5bf16e6 in /home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/lib/libtorch_cuda.so)\n", "frame #2: c10d::ProcessGroupNCCL::workCleanupLoop() + 0x19d (0x7faba5bf4c3d in /home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/lib/libtorch_cuda.so)\n", "frame #3: c10d::ProcessGroupNCCL::ncclCommWatchdog() + 0x119 (0x7faba5bf5839 in /home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/lib/libtorch_cuda.so)\n", "frame #4: + 0xdbbf4 (0x7fabef8c7bf4 in /home/recursal/miniconda3/envs/lm-eval/bin/../lib/libstdc++.so.6)\n", "frame #5: + 0x94ac3 (0x7fabf0fefac3 in /lib/x86_64-linux-gnu/libc.so.6)\n", "frame #6: + 0x126850 (0x7fabf1081850 in /lib/x86_64-linux-gnu/libc.so.6)\n", "\n", "Both `max_new_tokens` (=4096) and `max_length`(=960) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n", "[2024-02-01 09:06:33,732] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452532 closing signal SIGTERM\n", "[2024-02-01 09:06:33,732] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452533 closing signal SIGTERM\n", "[2024-02-01 09:06:33,733] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452534 closing signal SIGTERM\n", "[2024-02-01 09:06:33,733] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452535 closing signal SIGTERM\n", "[2024-02-01 09:06:33,734] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452536 closing signal SIGTERM\n", "[2024-02-01 09:06:33,735] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452537 closing signal SIGTERM\n", "[2024-02-01 09:06:33,736] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 1452538 closing signal SIGTERM\n", "[2024-02-01 09:06:34,580] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: -6) local_rank: 7 (pid: 1452539) of binary: /home/recursal/miniconda3/envs/lm-eval/bin/python\n", "Traceback (most recent call last):\n", " File \"/home/recursal/miniconda3/envs/lm-eval/bin/accelerate\", line 8, in \n", " sys.exit(main())\n", " ^^^^^^\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/accelerate/commands/accelerate_cli.py\", line 47, in main\n", " args.func(args)\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/accelerate/commands/launch.py\", line 1014, in launch_command\n", " multi_gpu_launcher(args)\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/accelerate/commands/launch.py\", line 672, in multi_gpu_launcher\n", " distrib_run.run(args)\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/distributed/run.py\", line 803, in run\n", " elastic_launch(\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/distributed/launcher/api.py\", line 135, in __call__\n", " return launch_agent(self._config, self._entrypoint, list(args))\n", " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", " File \"/home/recursal/miniconda3/envs/lm-eval/lib/python3.12/site-packages/torch/distributed/launcher/api.py\", line 268, in launch_agent\n", " raise ChildFailedError(\n", "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "========================================================\n", "lm_eval FAILED\n", "--------------------------------------------------------\n", "Failures:\n", " \n", "--------------------------------------------------------\n", "Root Cause (first observed failure):\n", "[0]:\n", " time : 2024-02-01_09:06:33\n", " host : localhost\n", " rank : 7 (local_rank: 7)\n", " exitcode : -6 (pid: 1452539)\n", " error_file: \n", " traceback : Signal 6 (SIGABRT) received by PID 1452539\n", "========================================================\n" ] } ], "source": [ "!accelerate launch -m lm_eval --model hf --model_args pretrained={MODEL_REPO},dtype=\"float16\",trust_remote_code=True \\\n", " --tasks gsm8k --num_fewshot 5 --batch_size 1 --log_samples \\\n", " --output_path ./results/{OUTPUT_PREFIX}-gsm8k" ] } ], "metadata": { "kernelspec": { "display_name": "lm-eval", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 2 }