{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6cb144df-3103-4fbe-96cf-5bc7e7e119c6", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting vllm\n", " Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)\n", "Collecting ninja (from vllm)\n", " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.11/site-packages (from vllm) (5.9.8)\n", "Collecting ray>=2.9 (from vllm)\n", " Downloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (13 kB)\n", "Collecting sentencepiece (from vllm)\n", " Downloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/site-packages (from vllm) (1.26.4)\n", "Collecting torch==2.1.2 (from vllm)\n", " Downloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)\n", "Requirement already satisfied: transformers>=4.37.0 in /usr/local/lib/python3.11/site-packages (from vllm) (4.37.2)\n", "Collecting xformers==0.0.23.post1 (from vllm)\n", " Downloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.0 kB)\n", "Requirement already satisfied: fastapi in /usr/local/lib/python3.11/site-packages (from vllm) (0.88.0)\n", "Collecting uvicorn[standard] (from vllm)\n", " Downloading uvicorn-0.27.1-py3-none-any.whl.metadata (6.3 kB)\n", "Collecting pydantic>=2.0 (from vllm)\n", " Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.5/83.5 kB\u001b[0m \u001b[31m272.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting aioprometheus[starlette] (from vllm)\n", " Downloading aioprometheus-23.12.0-py3-none-any.whl.metadata (9.8 kB)\n", "Collecting pynvml==11.5.0 (from vllm)\n", " Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m425.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.13.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (4.9.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.1.3)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (2023.10.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n", "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (8.9.2.26)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.3.1)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.0.2.54)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (10.3.2.106)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.4.5.107)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.0.106)\n", "Collecting nvidia-nccl-cu12==2.18.1 (from torch==2.1.2->vllm)\n", " Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n", "Collecting triton==2.1.0 (from torch==2.1.2->vllm)\n", " Downloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.1.2->vllm) (12.3.101)\n", "Collecting annotated-types>=0.4.0 (from pydantic>=2.0->vllm)\n", " Downloading annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n", "Collecting pydantic-core==2.16.2 (from pydantic>=2.0->vllm)\n", " Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n", "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (8.1.7)\n", "Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.21.1)\n", "Collecting msgpack<2.0.0,>=1.0.0 (from ray>=2.9->vllm)\n", " Downloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (23.2)\n", "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.25.2)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (6.0.1)\n", "Requirement already satisfied: aiosignal in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.3.1)\n", "Requirement already satisfied: frozenlist in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.4.1)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (2.31.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.20.3)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (2023.12.25)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.15.2)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.4.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (4.66.2)\n", "Collecting orjson (from aioprometheus[starlette]->vllm)\n", " Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m107.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting quantile-python>=1.1 (from aioprometheus[starlette]->vllm)\n", " Downloading quantile-python-1.1.tar.gz (2.9 kB)\n", " Installing build dependencies ... \u001b[?25ldone\n", "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\n", "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: starlette>=0.14.2 in /usr/local/lib/python3.11/site-packages (from aioprometheus[starlette]->vllm) (0.22.0)\n", "INFO: pip is looking at multiple versions of fastapi to determine which version is compatible with other requirements. This could take a while.\n", "Collecting fastapi (from vllm)\n", " Downloading fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)\n", "Collecting starlette>=0.14.2 (from aioprometheus[starlette]->vllm)\n", " Downloading starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)\n", "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.11/site-packages (from uvicorn[standard]->vllm) (0.14.0)\n", "Collecting httptools>=0.5.0 (from uvicorn[standard]->vllm)\n", " Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", "Collecting python-dotenv>=0.13 (from uvicorn[standard]->vllm)\n", " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]->vllm)\n", " Downloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", "Collecting watchfiles>=0.13 (from uvicorn[standard]->vllm)\n", " Downloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", "Collecting websockets>=10.4 (from uvicorn[standard]->vllm)\n", " Downloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.11/site-packages (from starlette>=0.14.2->aioprometheus[starlette]->vllm) (4.2.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/site-packages (from jinja2->torch==2.1.2->vllm) (2.1.5)\n", "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (23.2.0)\n", "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (2023.12.1)\n", "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.33.0)\n", "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.17.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.2.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2024.2.2)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.11/site-packages (from sympy->torch==2.1.2->vllm) (1.3.0)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette>=0.14.2->aioprometheus[starlette]->vllm) (1.3.0)\n", "Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl (38.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.1/38.1 MB\u001b[0m \u001b[31m152.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl (670.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m177.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl (213.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 MB\u001b[0m \u001b[31m236.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m278.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m278.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading pydantic-2.6.1-py3-none-any.whl (394 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m394.8/394.8 kB\u001b[0m \u001b[31m578.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m604.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl (65.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.4/65.4 MB\u001b[0m \u001b[31m199.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading fastapi-0.109.2-py3-none-any.whl (92 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m384.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m571.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m625.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n", "Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.5/318.5 kB\u001b[0m \u001b[31m610.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (557 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m558.0/558.0 kB\u001b[0m \u001b[31m604.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", "Downloading starlette-0.36.3-py3-none-any.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m404.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m98.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m621.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.9/130.9 kB\u001b[0m \u001b[31m492.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading aioprometheus-23.12.0-py3-none-any.whl (31 kB)\n", "Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.0/139.0 kB\u001b[0m \u001b[31m505.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading uvicorn-0.27.1-py3-none-any.whl (60 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m506.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: quantile-python\n", " Building wheel for quantile-python (pyproject.toml) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for quantile-python: filename=quantile_python-1.1-py3-none-any.whl size=3444 sha256=f2b9e6c120ce03904d5a6fcbccc70a62a4a05bb1352a8f87d02ddbafdf252601\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-jv2xdihe/wheels/67/a2/17/29e7169adf03a7e44b922abb6a42c2c1b0fda11f7bfbdb24a2\n", "Successfully built quantile-python\n", "Installing collected packages: sentencepiece, quantile-python, ninja, websockets, uvloop, uvicorn, triton, python-dotenv, pynvml, pydantic-core, orjson, nvidia-nccl-cu12, msgpack, httptools, annotated-types, watchfiles, starlette, pydantic, aioprometheus, torch, fastapi, xformers, ray, vllm\n", " Attempting uninstall: triton\n", " Found existing installation: triton 2.2.0\n", " Uninstalling triton-2.2.0:\n", " Successfully uninstalled triton-2.2.0\n", " Attempting uninstall: nvidia-nccl-cu12\n", " Found existing installation: nvidia-nccl-cu12 2.19.3\n", " Uninstalling nvidia-nccl-cu12-2.19.3:\n", " Successfully uninstalled nvidia-nccl-cu12-2.19.3\n", " Attempting uninstall: starlette\n", " Found existing installation: starlette 0.22.0\n", " Uninstalling starlette-0.22.0:\n", " Successfully uninstalled starlette-0.22.0\n", " Attempting uninstall: pydantic\n", " Found existing installation: pydantic 1.10.14\n", " Uninstalling pydantic-1.10.14:\n", " Successfully uninstalled pydantic-1.10.14\n", " Attempting uninstall: torch\n", " Found existing installation: torch 2.2.0\n", " Uninstalling torch-2.2.0:\n", " Successfully uninstalled torch-2.2.0\n", " Attempting uninstall: fastapi\n", " Found existing installation: fastapi 0.88.0\n", " Uninstalling fastapi-0.88.0:\n", " Successfully uninstalled fastapi-0.88.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "modal 0.57.47 requires synchronicity~=0.6.2, which is not installed.\n", "modal 0.57.47 requires grpclib==0.4.7, but you have grpclib 0.4.3 which is incompatible.\n", "modal 0.57.47 requires typer~=0.9.0, but you have typer 0.6.1 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed aioprometheus-23.12.0 annotated-types-0.6.0 fastapi-0.109.2 httptools-0.6.1 msgpack-1.0.7 ninja-1.11.1.1 nvidia-nccl-cu12-2.18.1 orjson-3.9.14 pydantic-2.6.1 pydantic-core-2.16.2 pynvml-11.5.0 python-dotenv-1.0.1 quantile-python-1.1 ray-2.9.2 sentencepiece-0.1.99 starlette-0.36.3 torch-2.1.2 triton-2.1.0 uvicorn-0.27.1 uvloop-0.19.0 vllm-0.3.0 watchfiles-0.21.0 websockets-12.0 xformers-0.0.23.post1\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install vllm " ] }, { "cell_type": "code", "execution_count": 2, "id": "b99c2bf2-4ed5-4514-a03c-4ca33a9e1060", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in /usr/local/lib/python3.11/site-packages (2.2.0)\n", "Requirement already satisfied: numpy<2,>=1.23.2 in /usr/local/lib/python3.11/site-packages (from pandas) (1.26.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n" ] } ], "source": [ "!pip install pandas" ] }, { "cell_type": "code", "execution_count": 5, "id": "75a61ec8-e440-42b0-8b4d-e3cb05841b71", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import numpy as np\n", "import pandas as pd\n", "import json\n", "import os\n", "from vllm import LLM, SamplingParams" ] }, { "cell_type": "code", "execution_count": 7, "id": "7d29ba48-d4b0-4f24-8f88-560d9bed100c", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO 02-15 18:03:33 llm_engine.py:72] Initializing an LLM engine with config: model='aissatoubalde/lab', tokenizer='microsoft/phi-2', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, seed=0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO 02-15 18:03:33 weight_utils.py:164] Using model weights format ['*.safetensors']\n" ] }, { "ename": "KeyError", "evalue": "'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m base_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmicrosoft/phi-2\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m merged_peft_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maissatoubalde/lab\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_peft_model_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_model_name\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/entrypoints/llm.py:109\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, enforce_eager, max_context_len_to_capture, disable_custom_all_reduce, **kwargs)\u001b[0m\n\u001b[1;32m 90\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable_log_stats\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 91\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m 92\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 93\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 108\u001b[0m )\n\u001b[0;32m--> 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:356\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args)\u001b[0m\n\u001b[1;32m 354\u001b[0m placement_group \u001b[38;5;241m=\u001b[39m initialize_cluster(parallel_config)\n\u001b[1;32m 355\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 356\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_configs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[43m \u001b[49m\u001b[43mplacement_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:111\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, lora_config, placement_group, log_stats)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_workers_ray(placement_group)\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 111\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;66;03m# Profile the memory usage and initialize the cache.\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache()\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:152\u001b[0m, in \u001b[0;36mLLMEngine._init_workers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdriver_worker \u001b[38;5;241m=\u001b[39m Worker(\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config,\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparallel_config,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 149\u001b[0m is_driver_worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 150\u001b[0m )\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_workers(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit_model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 152\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mload_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:983\u001b[0m, in \u001b[0;36mLLMEngine._run_workers\u001b[0;34m(self, method, driver_args, driver_kwargs, max_concurrent_workers, *args, **kwargs)\u001b[0m\n\u001b[1;32m 980\u001b[0m driver_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n\u001b[1;32m 982\u001b[0m \u001b[38;5;66;03m# Start the driver worker after all the ray workers.\u001b[39;00m\n\u001b[0;32m--> 983\u001b[0m driver_worker_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 984\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 986\u001b[0m \u001b[38;5;66;03m# Get the results of the ray workers.\u001b[39;00m\n\u001b[1;32m 987\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworkers:\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/worker.py:92\u001b[0m, in \u001b[0;36mWorker.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 92\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/model_runner.py:75\u001b[0m, in \u001b[0;36mModelRunner.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlora_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 77\u001b[0m vocab_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mvocab_size\n\u001b[1;32m 79\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlora_config:\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/model_loader.py:88\u001b[0m, in \u001b[0;36mget_model\u001b[0;34m(model_config, lora_config)\u001b[0m\n\u001b[1;32m 85\u001b[0m initialize_dummy_weights(model)\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;66;03m# Load the weights from the cached or downloaded files.\u001b[39;00m\n\u001b[0;32m---> 88\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_weights\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 89\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_format\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\u001b[38;5;241m.\u001b[39meval()\n", "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/models/phi.py:302\u001b[0m, in \u001b[0;36mPhiForCausalLM.load_weights\u001b[0;34m(self, model_name_or_path, cache_dir, load_format, revision)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;66;03m# pylint: disable=E1136\u001b[39;00m\n\u001b[0;32m--> 302\u001b[0m param \u001b[38;5;241m=\u001b[39m \u001b[43mparams_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 303\u001b[0m weight_loader \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(param, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweight_loader\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 304\u001b[0m default_weight_loader)\n\u001b[1;32m 305\u001b[0m weight_loader(param, loaded_weight)\n", "\u001b[0;31mKeyError\u001b[0m: 'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'" ] } ], "source": [ "os.environ['CUDA_VISIBLE_DEVICES']=\"0\"\n", "base_model_name='microsoft/phi-2'\n", "merged_peft_model_name=\"aissatoubalde/lab\"\n", "llm = LLM(model=merged_peft_model_name, tokenizer=base_model_name)" ] }, { "cell_type": "code", "execution_count": null, "id": "f416b0ae-16bf-446a-86b1-97337a610c53", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }