File size: 36,931 Bytes
489dc47 e9cc5c3 489dc47 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 fe490e0 e9cc5c3 489dc47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "6cb144df-3103-4fbe-96cf-5bc7e7e119c6",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting vllm\n",
" Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)\n",
"Collecting ninja (from vllm)\n",
" Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.11/site-packages (from vllm) (5.9.8)\n",
"Collecting ray>=2.9 (from vllm)\n",
" Downloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (13 kB)\n",
"Collecting sentencepiece (from vllm)\n",
" Downloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.11/site-packages (from vllm) (1.26.4)\n",
"Collecting torch==2.1.2 (from vllm)\n",
" Downloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)\n",
"Requirement already satisfied: transformers>=4.37.0 in /usr/local/lib/python3.11/site-packages (from vllm) (4.37.2)\n",
"Collecting xformers==0.0.23.post1 (from vllm)\n",
" Downloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.0 kB)\n",
"Requirement already satisfied: fastapi in /usr/local/lib/python3.11/site-packages (from vllm) (0.88.0)\n",
"Collecting uvicorn[standard] (from vllm)\n",
" Downloading uvicorn-0.27.1-py3-none-any.whl.metadata (6.3 kB)\n",
"Collecting pydantic>=2.0 (from vllm)\n",
" Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m83.5/83.5 kB\u001b[0m \u001b[31m272.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting aioprometheus[starlette] (from vllm)\n",
" Downloading aioprometheus-23.12.0-py3-none-any.whl.metadata (9.8 kB)\n",
"Collecting pynvml==11.5.0 (from vllm)\n",
" Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m425.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.13.1)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (4.9.0)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.2.1)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.1.3)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (2023.10.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (8.9.2.26)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.3.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.0.2.54)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (10.3.2.106)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.4.5.107)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.0.106)\n",
"Collecting nvidia-nccl-cu12==2.18.1 (from torch==2.1.2->vllm)\n",
" Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
"Collecting triton==2.1.0 (from torch==2.1.2->vllm)\n",
" Downloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.1.2->vllm) (12.3.101)\n",
"Collecting annotated-types>=0.4.0 (from pydantic>=2.0->vllm)\n",
" Downloading annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n",
"Collecting pydantic-core==2.16.2 (from pydantic>=2.0->vllm)\n",
" Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (8.1.7)\n",
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.21.1)\n",
"Collecting msgpack<2.0.0,>=1.0.0 (from ray>=2.9->vllm)\n",
" Downloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (23.2)\n",
"Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.25.2)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (6.0.1)\n",
"Requirement already satisfied: aiosignal in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.3.1)\n",
"Requirement already satisfied: frozenlist in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.4.1)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (2.31.0)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.20.3)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (2023.12.25)\n",
"Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.15.2)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.4.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (4.66.2)\n",
"Collecting orjson (from aioprometheus[starlette]->vllm)\n",
" Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m107.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting quantile-python>=1.1 (from aioprometheus[starlette]->vllm)\n",
" Downloading quantile-python-1.1.tar.gz (2.9 kB)\n",
" Installing build dependencies ... \u001b[?25ldone\n",
"\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
"\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n",
"\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
"\u001b[?25hRequirement already satisfied: starlette>=0.14.2 in /usr/local/lib/python3.11/site-packages (from aioprometheus[starlette]->vllm) (0.22.0)\n",
"INFO: pip is looking at multiple versions of fastapi to determine which version is compatible with other requirements. This could take a while.\n",
"Collecting fastapi (from vllm)\n",
" Downloading fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)\n",
"Collecting starlette>=0.14.2 (from aioprometheus[starlette]->vllm)\n",
" Downloading starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)\n",
"Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.11/site-packages (from uvicorn[standard]->vllm) (0.14.0)\n",
"Collecting httptools>=0.5.0 (from uvicorn[standard]->vllm)\n",
" Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
"Collecting python-dotenv>=0.13 (from uvicorn[standard]->vllm)\n",
" Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
"Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]->vllm)\n",
" Downloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
"Collecting watchfiles>=0.13 (from uvicorn[standard]->vllm)\n",
" Downloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
"Collecting websockets>=10.4 (from uvicorn[standard]->vllm)\n",
" Downloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
"Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.11/site-packages (from starlette>=0.14.2->aioprometheus[starlette]->vllm) (4.2.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/site-packages (from jinja2->torch==2.1.2->vllm) (2.1.5)\n",
"Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (23.2.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (2023.12.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.33.0)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.17.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.1.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.2.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2024.2.2)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.11/site-packages (from sympy->torch==2.1.2->vllm) (1.3.0)\n",
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette>=0.14.2->aioprometheus[starlette]->vllm) (1.3.0)\n",
"Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl (38.1 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m38.1/38.1 MB\u001b[0m \u001b[31m152.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl (670.2 MB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m177.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hDownloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl (213.0 MB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m213.0/213.0 MB\u001b[0m \u001b[31m236.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hDownloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m278.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hDownloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m278.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
"\u001b[?25hDownloading pydantic-2.6.1-py3-none-any.whl (394 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m394.8/394.8 kB\u001b[0m \u001b[31m578.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m604.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl (65.4 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m65.4/65.4 MB\u001b[0m \u001b[31m199.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading fastapi-0.109.2-py3-none-any.whl (92 kB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m384.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m571.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m625.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n",
"Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m318.5/318.5 kB\u001b[0m \u001b[31m610.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (557 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m558.0/558.0 kB\u001b[0m \u001b[31m604.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
"Downloading starlette-0.36.3-py3-none-any.whl (71 kB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m404.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.5 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m98.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m621.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m130.9/130.9 kB\u001b[0m \u001b[31m492.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading aioprometheus-23.12.0-py3-none-any.whl (31 kB)\n",
"Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n",
"\u001b[2K \u001b[90mβββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m139.0/139.0 kB\u001b[0m \u001b[31m505.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading uvicorn-0.27.1-py3-none-any.whl (60 kB)\n",
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m506.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hBuilding wheels for collected packages: quantile-python\n",
" Building wheel for quantile-python (pyproject.toml) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for quantile-python: filename=quantile_python-1.1-py3-none-any.whl size=3444 sha256=f2b9e6c120ce03904d5a6fcbccc70a62a4a05bb1352a8f87d02ddbafdf252601\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-jv2xdihe/wheels/67/a2/17/29e7169adf03a7e44b922abb6a42c2c1b0fda11f7bfbdb24a2\n",
"Successfully built quantile-python\n",
"Installing collected packages: sentencepiece, quantile-python, ninja, websockets, uvloop, uvicorn, triton, python-dotenv, pynvml, pydantic-core, orjson, nvidia-nccl-cu12, msgpack, httptools, annotated-types, watchfiles, starlette, pydantic, aioprometheus, torch, fastapi, xformers, ray, vllm\n",
" Attempting uninstall: triton\n",
" Found existing installation: triton 2.2.0\n",
" Uninstalling triton-2.2.0:\n",
" Successfully uninstalled triton-2.2.0\n",
" Attempting uninstall: nvidia-nccl-cu12\n",
" Found existing installation: nvidia-nccl-cu12 2.19.3\n",
" Uninstalling nvidia-nccl-cu12-2.19.3:\n",
" Successfully uninstalled nvidia-nccl-cu12-2.19.3\n",
" Attempting uninstall: starlette\n",
" Found existing installation: starlette 0.22.0\n",
" Uninstalling starlette-0.22.0:\n",
" Successfully uninstalled starlette-0.22.0\n",
" Attempting uninstall: pydantic\n",
" Found existing installation: pydantic 1.10.14\n",
" Uninstalling pydantic-1.10.14:\n",
" Successfully uninstalled pydantic-1.10.14\n",
" Attempting uninstall: torch\n",
" Found existing installation: torch 2.2.0\n",
" Uninstalling torch-2.2.0:\n",
" Successfully uninstalled torch-2.2.0\n",
" Attempting uninstall: fastapi\n",
" Found existing installation: fastapi 0.88.0\n",
" Uninstalling fastapi-0.88.0:\n",
" Successfully uninstalled fastapi-0.88.0\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"modal 0.57.47 requires synchronicity~=0.6.2, which is not installed.\n",
"modal 0.57.47 requires grpclib==0.4.7, but you have grpclib 0.4.3 which is incompatible.\n",
"modal 0.57.47 requires typer~=0.9.0, but you have typer 0.6.1 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed aioprometheus-23.12.0 annotated-types-0.6.0 fastapi-0.109.2 httptools-0.6.1 msgpack-1.0.7 ninja-1.11.1.1 nvidia-nccl-cu12-2.18.1 orjson-3.9.14 pydantic-2.6.1 pydantic-core-2.16.2 pynvml-11.5.0 python-dotenv-1.0.1 quantile-python-1.1 ray-2.9.2 sentencepiece-0.1.99 starlette-0.36.3 torch-2.1.2 triton-2.1.0 uvicorn-0.27.1 uvloop-0.19.0 vllm-0.3.0 watchfiles-0.21.0 websockets-12.0 xformers-0.0.23.post1\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install vllm "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "b99c2bf2-4ed5-4514-a03c-4ca33a9e1060",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /usr/local/lib/python3.11/site-packages (2.2.0)\n",
"Requirement already satisfied: numpy<2,>=1.23.2 in /usr/local/lib/python3.11/site-packages (from pandas) (1.26.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n"
]
}
],
"source": [
"!pip install pandas"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "75a61ec8-e440-42b0-8b4d-e3cb05841b71",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import numpy as np\n",
"import pandas as pd\n",
"import json\n",
"import os\n",
"from vllm import LLM, SamplingParams"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7d29ba48-d4b0-4f24-8f88-560d9bed100c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 02-15 18:03:33 llm_engine.py:72] Initializing an LLM engine with config: model='aissatoubalde/lab', tokenizer='microsoft/phi-2', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, seed=0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 02-15 18:03:33 weight_utils.py:164] Using model weights format ['*.safetensors']\n"
]
},
{
"ename": "KeyError",
"evalue": "'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m base_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmicrosoft/phi-2\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m merged_peft_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maissatoubalde/lab\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_peft_model_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_model_name\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/entrypoints/llm.py:109\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, enforce_eager, max_context_len_to_capture, disable_custom_all_reduce, **kwargs)\u001b[0m\n\u001b[1;32m 90\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable_log_stats\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 91\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m 92\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 93\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 108\u001b[0m )\n\u001b[0;32m--> 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:356\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args)\u001b[0m\n\u001b[1;32m 354\u001b[0m placement_group \u001b[38;5;241m=\u001b[39m initialize_cluster(parallel_config)\n\u001b[1;32m 355\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 356\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_configs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[43m \u001b[49m\u001b[43mplacement_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:111\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, lora_config, placement_group, log_stats)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_workers_ray(placement_group)\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 111\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# Profile the memory usage and initialize the cache.\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache()\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:152\u001b[0m, in \u001b[0;36mLLMEngine._init_workers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdriver_worker \u001b[38;5;241m=\u001b[39m Worker(\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config,\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparallel_config,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 149\u001b[0m is_driver_worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 150\u001b[0m )\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_workers(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit_model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 152\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mload_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:983\u001b[0m, in \u001b[0;36mLLMEngine._run_workers\u001b[0;34m(self, method, driver_args, driver_kwargs, max_concurrent_workers, *args, **kwargs)\u001b[0m\n\u001b[1;32m 980\u001b[0m driver_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n\u001b[1;32m 982\u001b[0m \u001b[38;5;66;03m# Start the driver worker after all the ray workers.\u001b[39;00m\n\u001b[0;32m--> 983\u001b[0m driver_worker_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[38;5;66;03m# Get the results of the ray workers.\u001b[39;00m\n\u001b[1;32m 987\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworkers:\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/worker.py:92\u001b[0m, in \u001b[0;36mWorker.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 92\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/model_runner.py:75\u001b[0m, in \u001b[0;36mModelRunner.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlora_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 77\u001b[0m vocab_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mvocab_size\n\u001b[1;32m 79\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlora_config:\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/model_loader.py:88\u001b[0m, in \u001b[0;36mget_model\u001b[0;34m(model_config, lora_config)\u001b[0m\n\u001b[1;32m 85\u001b[0m initialize_dummy_weights(model)\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;66;03m# Load the weights from the cached or downloaded files.\u001b[39;00m\n\u001b[0;32m---> 88\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_weights\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 89\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_format\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\u001b[38;5;241m.\u001b[39meval()\n",
"File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/models/phi.py:302\u001b[0m, in \u001b[0;36mPhiForCausalLM.load_weights\u001b[0;34m(self, model_name_or_path, cache_dir, load_format, revision)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;66;03m# pylint: disable=E1136\u001b[39;00m\n\u001b[0;32m--> 302\u001b[0m param \u001b[38;5;241m=\u001b[39m \u001b[43mparams_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 303\u001b[0m weight_loader \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(param, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweight_loader\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 304\u001b[0m default_weight_loader)\n\u001b[1;32m 305\u001b[0m weight_loader(param, loaded_weight)\n",
"\u001b[0;31mKeyError\u001b[0m: 'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'"
]
}
],
"source": [
"os.environ['CUDA_VISIBLE_DEVICES']=\"0\"\n",
"base_model_name='microsoft/phi-2'\n",
"merged_peft_model_name=\"aissatoubalde/lab\"\n",
"llm = LLM(model=merged_peft_model_name, tokenizer=base_model_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f416b0ae-16bf-446a-86b1-97337a610c53",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|