Spaces:
Runtime error
Runtime error
Update endpoint to make it more generic
Browse files- Dockerfile +2 -2
- main.py +11 -12
- requirements.txt +2 -2
Dockerfile
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
# you will also find guides on how best to write your Dockerfile
|
3 |
|
4 |
-
FROM python:3.9
|
5 |
-
|
6 |
# Set up a new user named "user" with user ID 1000
|
7 |
RUN useradd -m -u 1000 user
|
8 |
|
@@ -19,6 +17,8 @@ WORKDIR $HOME/app
|
|
19 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
20 |
COPY --chown=user . $HOME/app
|
21 |
|
|
|
|
|
22 |
WORKDIR /code
|
23 |
|
24 |
COPY ./requirements.txt /code/requirements.txt
|
|
|
1 |
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
# you will also find guides on how best to write your Dockerfile
|
3 |
|
|
|
|
|
4 |
# Set up a new user named "user" with user ID 1000
|
5 |
RUN useradd -m -u 1000 user
|
6 |
|
|
|
17 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
18 |
COPY --chown=user . $HOME/app
|
19 |
|
20 |
+
FROM python:3.10.9
|
21 |
+
|
22 |
WORKDIR /code
|
23 |
|
24 |
COPY ./requirements.txt /code/requirements.txt
|
main.py
CHANGED
@@ -1,21 +1,20 @@
|
|
1 |
from fastapi import FastAPI
|
|
|
2 |
from vllm import LLM, SamplingParams
|
3 |
|
4 |
app = FastAPI()
|
5 |
|
6 |
|
7 |
-
@app.get("/")
|
8 |
-
def read_root(
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
]
|
15 |
-
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
|
16 |
|
17 |
-
llm = LLM(model=
|
18 |
|
19 |
-
|
20 |
|
21 |
-
return
|
|
|
1 |
from fastapi import FastAPI
|
2 |
+
from typing import List
|
3 |
from vllm import LLM, SamplingParams
|
4 |
|
5 |
app = FastAPI()
|
6 |
|
7 |
|
8 |
+
@app.get("/llm_inference")
|
9 |
+
def read_root(
|
10 |
+
prompts: List[str],
|
11 |
+
model: str = "meta-llama/Llama-2-7b-hf",
|
12 |
+
temperature: float = 0.,
|
13 |
+
max_tokens: int = 1024) -> List:
|
14 |
+
sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
|
|
|
|
|
15 |
|
16 |
+
llm = LLM(model=model)
|
17 |
|
18 |
+
response = llm.generate(prompts, sampling_params)
|
19 |
|
20 |
+
return response
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
fastapi==0.74
|
2 |
-
requests==2.27
|
3 |
uvicorn[standard]==0.17.6
|
4 |
vllm==0.2.2
|
|
|
1 |
+
fastapi==0.74.1
|
2 |
+
requests==2.27.1
|
3 |
uvicorn[standard]==0.17.6
|
4 |
vllm==0.2.2
|