File size: 36,931 Bytes
489dc47
 
 
 
e9cc5c3
489dc47
e9cc5c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe490e0
e9cc5c3
 
fe490e0
e9cc5c3
 
 
 
 
 
 
 
 
 
 
fe490e0
e9cc5c3
 
 
 
 
 
 
 
 
fe490e0
e9cc5c3
 
 
 
 
 
 
 
 
 
 
 
 
fe490e0
e9cc5c3
 
 
 
fe490e0
e9cc5c3
 
 
 
fe490e0
e9cc5c3
 
 
 
 
 
 
 
fe490e0
 
e9cc5c3
 
 
 
 
 
 
 
 
 
 
 
 
 
489dc47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6cb144df-3103-4fbe-96cf-5bc7e7e119c6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting vllm\n",
      "  Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.4 kB)\n",
      "Collecting ninja (from vllm)\n",
      "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n",
      "Requirement already satisfied: psutil in /usr/local/lib/python3.11/site-packages (from vllm) (5.9.8)\n",
      "Collecting ray>=2.9 (from vllm)\n",
      "  Downloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl.metadata (13 kB)\n",
      "Collecting sentencepiece (from vllm)\n",
      "  Downloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.11/site-packages (from vllm) (1.26.4)\n",
      "Collecting torch==2.1.2 (from vllm)\n",
      "  Downloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)\n",
      "Requirement already satisfied: transformers>=4.37.0 in /usr/local/lib/python3.11/site-packages (from vllm) (4.37.2)\n",
      "Collecting xformers==0.0.23.post1 (from vllm)\n",
      "  Downloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.0 kB)\n",
      "Requirement already satisfied: fastapi in /usr/local/lib/python3.11/site-packages (from vllm) (0.88.0)\n",
      "Collecting uvicorn[standard] (from vllm)\n",
      "  Downloading uvicorn-0.27.1-py3-none-any.whl.metadata (6.3 kB)\n",
      "Collecting pydantic>=2.0 (from vllm)\n",
      "  Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.5/83.5 kB\u001b[0m \u001b[31m272.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting aioprometheus[starlette] (from vllm)\n",
      "  Downloading aioprometheus-23.12.0-py3-none-any.whl.metadata (9.8 kB)\n",
      "Collecting pynvml==11.5.0 (from vllm)\n",
      "  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m425.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.13.1)\n",
      "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (4.9.0)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (1.12)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.2.1)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (3.1.3)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (2023.10.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.0.106)\n",
      "Collecting nvidia-nccl-cu12==2.18.1 (from torch==2.1.2->vllm)\n",
      "  Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl.metadata (1.8 kB)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/site-packages (from torch==2.1.2->vllm) (12.1.105)\n",
      "Collecting triton==2.1.0 (from torch==2.1.2->vllm)\n",
      "  Downloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch==2.1.2->vllm) (12.3.101)\n",
      "Collecting annotated-types>=0.4.0 (from pydantic>=2.0->vllm)\n",
      "  Downloading annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n",
      "Collecting pydantic-core==2.16.2 (from pydantic>=2.0->vllm)\n",
      "  Downloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)\n",
      "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (8.1.7)\n",
      "Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.21.1)\n",
      "Collecting msgpack<2.0.0,>=1.0.0 (from ray>=2.9->vllm)\n",
      "  Downloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)\n",
      "Requirement already satisfied: packaging in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (23.2)\n",
      "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (4.25.2)\n",
      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (6.0.1)\n",
      "Requirement already satisfied: aiosignal in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.3.1)\n",
      "Requirement already satisfied: frozenlist in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (1.4.1)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.11/site-packages (from ray>=2.9->vllm) (2.31.0)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.20.3)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (2023.12.25)\n",
      "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.15.2)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (0.4.2)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/site-packages (from transformers>=4.37.0->vllm) (4.66.2)\n",
      "Collecting orjson (from aioprometheus[starlette]->vllm)\n",
      "  Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m107.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting quantile-python>=1.1 (from aioprometheus[starlette]->vllm)\n",
      "  Downloading quantile-python-1.1.tar.gz (2.9 kB)\n",
      "  Installing build dependencies ... \u001b[?25ldone\n",
      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
      "\u001b[?25h  Installing backend dependencies ... \u001b[?25ldone\n",
      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: starlette>=0.14.2 in /usr/local/lib/python3.11/site-packages (from aioprometheus[starlette]->vllm) (0.22.0)\n",
      "INFO: pip is looking at multiple versions of fastapi to determine which version is compatible with other requirements. This could take a while.\n",
      "Collecting fastapi (from vllm)\n",
      "  Downloading fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)\n",
      "Collecting starlette>=0.14.2 (from aioprometheus[starlette]->vllm)\n",
      "  Downloading starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)\n",
      "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.11/site-packages (from uvicorn[standard]->vllm) (0.14.0)\n",
      "Collecting httptools>=0.5.0 (from uvicorn[standard]->vllm)\n",
      "  Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
      "Collecting python-dotenv>=0.13 (from uvicorn[standard]->vllm)\n",
      "  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
      "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]->vllm)\n",
      "  Downloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
      "Collecting watchfiles>=0.13 (from uvicorn[standard]->vllm)\n",
      "  Downloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
      "Collecting websockets>=10.4 (from uvicorn[standard]->vllm)\n",
      "  Downloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
      "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.11/site-packages (from starlette>=0.14.2->aioprometheus[starlette]->vllm) (4.2.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/site-packages (from jinja2->torch==2.1.2->vllm) (2.1.5)\n",
      "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (23.2.0)\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (2023.12.1)\n",
      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.33.0)\n",
      "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/site-packages (from jsonschema->ray>=2.9->vllm) (0.17.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.1.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (3.6)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2.2.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/site-packages (from requests->ray>=2.9->vllm) (2024.2.2)\n",
      "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.11/site-packages (from sympy->torch==2.1.2->vllm) (1.3.0)\n",
      "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette>=0.14.2->aioprometheus[starlette]->vllm) (1.3.0)\n",
      "Downloading vllm-0.3.0-cp311-cp311-manylinux1_x86_64.whl (38.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.1/38.1 MB\u001b[0m \u001b[31m152.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl (670.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m177.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl (213.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 MB\u001b[0m \u001b[31m236.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m278.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m278.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading pydantic-2.6.1-py3-none-any.whl (394 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m394.8/394.8 kB\u001b[0m \u001b[31m578.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m604.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading ray-2.9.2-cp311-cp311-manylinux2014_x86_64.whl (65.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.4/65.4 MB\u001b[0m \u001b[31m199.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading fastapi-0.109.2-py3-none-any.whl (92 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m384.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m571.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m625.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading annotated_types-0.6.0-py3-none-any.whl (12 kB)\n",
      "Downloading httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.5/318.5 kB\u001b[0m \u001b[31m610.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (557 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m558.0/558.0 kB\u001b[0m \u001b[31m604.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
      "Downloading starlette-0.36.3-py3-none-any.whl (71 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m404.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m98.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m621.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.9/130.9 kB\u001b[0m \u001b[31m492.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading aioprometheus-23.12.0-py3-none-any.whl (31 kB)\n",
      "Downloading orjson-3.9.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.0/139.0 kB\u001b[0m \u001b[31m505.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading uvicorn-0.27.1-py3-none-any.whl (60 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m506.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hBuilding wheels for collected packages: quantile-python\n",
      "  Building wheel for quantile-python (pyproject.toml) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for quantile-python: filename=quantile_python-1.1-py3-none-any.whl size=3444 sha256=f2b9e6c120ce03904d5a6fcbccc70a62a4a05bb1352a8f87d02ddbafdf252601\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-jv2xdihe/wheels/67/a2/17/29e7169adf03a7e44b922abb6a42c2c1b0fda11f7bfbdb24a2\n",
      "Successfully built quantile-python\n",
      "Installing collected packages: sentencepiece, quantile-python, ninja, websockets, uvloop, uvicorn, triton, python-dotenv, pynvml, pydantic-core, orjson, nvidia-nccl-cu12, msgpack, httptools, annotated-types, watchfiles, starlette, pydantic, aioprometheus, torch, fastapi, xformers, ray, vllm\n",
      "  Attempting uninstall: triton\n",
      "    Found existing installation: triton 2.2.0\n",
      "    Uninstalling triton-2.2.0:\n",
      "      Successfully uninstalled triton-2.2.0\n",
      "  Attempting uninstall: nvidia-nccl-cu12\n",
      "    Found existing installation: nvidia-nccl-cu12 2.19.3\n",
      "    Uninstalling nvidia-nccl-cu12-2.19.3:\n",
      "      Successfully uninstalled nvidia-nccl-cu12-2.19.3\n",
      "  Attempting uninstall: starlette\n",
      "    Found existing installation: starlette 0.22.0\n",
      "    Uninstalling starlette-0.22.0:\n",
      "      Successfully uninstalled starlette-0.22.0\n",
      "  Attempting uninstall: pydantic\n",
      "    Found existing installation: pydantic 1.10.14\n",
      "    Uninstalling pydantic-1.10.14:\n",
      "      Successfully uninstalled pydantic-1.10.14\n",
      "  Attempting uninstall: torch\n",
      "    Found existing installation: torch 2.2.0\n",
      "    Uninstalling torch-2.2.0:\n",
      "      Successfully uninstalled torch-2.2.0\n",
      "  Attempting uninstall: fastapi\n",
      "    Found existing installation: fastapi 0.88.0\n",
      "    Uninstalling fastapi-0.88.0:\n",
      "      Successfully uninstalled fastapi-0.88.0\n",
      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "modal 0.57.47 requires synchronicity~=0.6.2, which is not installed.\n",
      "modal 0.57.47 requires grpclib==0.4.7, but you have grpclib 0.4.3 which is incompatible.\n",
      "modal 0.57.47 requires typer~=0.9.0, but you have typer 0.6.1 which is incompatible.\u001b[0m\u001b[31m\n",
      "\u001b[0mSuccessfully installed aioprometheus-23.12.0 annotated-types-0.6.0 fastapi-0.109.2 httptools-0.6.1 msgpack-1.0.7 ninja-1.11.1.1 nvidia-nccl-cu12-2.18.1 orjson-3.9.14 pydantic-2.6.1 pydantic-core-2.16.2 pynvml-11.5.0 python-dotenv-1.0.1 quantile-python-1.1 ray-2.9.2 sentencepiece-0.1.99 starlette-0.36.3 torch-2.1.2 triton-2.1.0 uvicorn-0.27.1 uvloop-0.19.0 vllm-0.3.0 watchfiles-0.21.0 websockets-12.0 xformers-0.0.23.post1\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install vllm "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b99c2bf2-4ed5-4514-a03c-4ca33a9e1060",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in /usr/local/lib/python3.11/site-packages (2.2.0)\n",
      "Requirement already satisfied: numpy<2,>=1.23.2 in /usr/local/lib/python3.11/site-packages (from pandas) (1.26.4)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/site-packages (from pandas) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/site-packages (from pandas) (2024.1)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "75a61ec8-e440-42b0-8b4d-e3cb05841b71",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import json\n",
    "import os\n",
    "from vllm import LLM, SamplingParams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7d29ba48-d4b0-4f24-8f88-560d9bed100c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 02-15 18:03:33 llm_engine.py:72] Initializing an LLM engine with config: model='aissatoubalde/lab', tokenizer='microsoft/phi-2', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, seed=0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 02-15 18:03:33 weight_utils.py:164] Using model weights format ['*.safetensors']\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m      2\u001b[0m base_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmicrosoft/phi-2\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m      3\u001b[0m merged_peft_model_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maissatoubalde/lab\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_peft_model_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbase_model_name\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/entrypoints/llm.py:109\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, enforce_eager, max_context_len_to_capture, disable_custom_all_reduce, **kwargs)\u001b[0m\n\u001b[1;32m     90\u001b[0m     kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable_log_stats\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m     91\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m     92\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m     93\u001b[0m     tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    107\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m    108\u001b[0m )\n\u001b[0;32m--> 109\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    110\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:356\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args)\u001b[0m\n\u001b[1;32m    354\u001b[0m placement_group \u001b[38;5;241m=\u001b[39m initialize_cluster(parallel_config)\n\u001b[1;32m    355\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 356\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_configs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    357\u001b[0m \u001b[43m             \u001b[49m\u001b[43mplacement_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    358\u001b[0m \u001b[43m             \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    359\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:111\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, lora_config, placement_group, log_stats)\u001b[0m\n\u001b[1;32m    109\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_workers_ray(placement_group)\n\u001b[1;32m    110\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 111\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;66;03m# Profile the memory usage and initialize the cache.\u001b[39;00m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache()\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:152\u001b[0m, in \u001b[0;36mLLMEngine._init_workers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    140\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdriver_worker \u001b[38;5;241m=\u001b[39m Worker(\n\u001b[1;32m    141\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config,\n\u001b[1;32m    142\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparallel_config,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    149\u001b[0m     is_driver_worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m    150\u001b[0m )\n\u001b[1;32m    151\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_workers(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minit_model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 152\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_workers\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mload_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/engine/llm_engine.py:983\u001b[0m, in \u001b[0;36mLLMEngine._run_workers\u001b[0;34m(self, method, driver_args, driver_kwargs, max_concurrent_workers, *args, **kwargs)\u001b[0m\n\u001b[1;32m    980\u001b[0m     driver_kwargs \u001b[38;5;241m=\u001b[39m kwargs\n\u001b[1;32m    982\u001b[0m \u001b[38;5;66;03m# Start the driver worker after all the ray workers.\u001b[39;00m\n\u001b[0;32m--> 983\u001b[0m driver_worker_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    984\u001b[0m \u001b[43m                               \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdriver_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    986\u001b[0m \u001b[38;5;66;03m# Get the results of the ray workers.\u001b[39;00m\n\u001b[1;32m    987\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworkers:\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/worker.py:92\u001b[0m, in \u001b[0;36mWorker.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     91\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 92\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/worker/model_runner.py:75\u001b[0m, in \u001b[0;36mModelRunner.load_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     74\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 75\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlora_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     77\u001b[0m     vocab_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mvocab_size\n\u001b[1;32m     79\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlora_config:\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/model_loader.py:88\u001b[0m, in \u001b[0;36mget_model\u001b[0;34m(model_config, lora_config)\u001b[0m\n\u001b[1;32m     85\u001b[0m         initialize_dummy_weights(model)\n\u001b[1;32m     86\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     87\u001b[0m         \u001b[38;5;66;03m# Load the weights from the cached or downloaded files.\u001b[39;00m\n\u001b[0;32m---> 88\u001b[0m         \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_weights\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     89\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_format\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\u001b[38;5;241m.\u001b[39meval()\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/vllm/model_executor/models/phi.py:302\u001b[0m, in \u001b[0;36mPhiForCausalLM.load_weights\u001b[0;34m(self, model_name_or_path, cache_dir, load_format, revision)\u001b[0m\n\u001b[1;32m    299\u001b[0m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m    300\u001b[0m \u001b[38;5;66;03m# pylint: disable=E1136\u001b[39;00m\n\u001b[0;32m--> 302\u001b[0m param \u001b[38;5;241m=\u001b[39m \u001b[43mparams_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m    303\u001b[0m weight_loader \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(param, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweight_loader\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    304\u001b[0m                         default_weight_loader)\n\u001b[1;32m    305\u001b[0m weight_loader(param, loaded_weight)\n",
      "\u001b[0;31mKeyError\u001b[0m: 'base_model.model.model.layers.0.mlp.fc1.lora_A.weight'"
     ]
    }
   ],
   "source": [
    "os.environ['CUDA_VISIBLE_DEVICES']=\"0\"\n",
    "base_model_name='microsoft/phi-2'\n",
    "merged_peft_model_name=\"aissatoubalde/lab\"\n",
    "llm = LLM(model=merged_peft_model_name, tokenizer=base_model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f416b0ae-16bf-446a-86b1-97337a610c53",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}