Spaces:

Wolf369
/

vllm

Runtime error

Wolf369 commited on Nov 24, 2023

Commit

8b64a94

•

1 Parent(s): 49ae147

Update endpoint to make it more generic

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -1,8 +1,6 @@
 # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
 # you will also find guides on how best to write your Dockerfile
-FROM python:3.9
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
@@ -19,6 +17,8 @@ WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
 WORKDIR /code
 COPY ./requirements.txt /code/requirements.txt

 # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
 # you will also find guides on how best to write your Dockerfile
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
+FROM python:3.10.9
 WORKDIR /code
 COPY ./requirements.txt /code/requirements.txt

main.py CHANGED Viewed

@@ -1,21 +1,20 @@
 from fastapi import FastAPI
 from vllm import LLM, SamplingParams
 app = FastAPI()
-@app.get("/")
-def read_root():
-    prompts = [
-        "Hello, my name is",
-        "The president of the United States is",
-        "The capital of France is",
-        "The future of AI is",
-    ]
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
-    llm = LLM(model="facebook/opt-125m")
-    outputs = llm.generate(prompts, sampling_params)
-    return {"outputs": outputs}

 from fastapi import FastAPI
+from typing import List
 from vllm import LLM, SamplingParams
 app = FastAPI()
+@app.get("/llm_inference")
+def read_root(
+        prompts: List[str],
+        model: str = "meta-llama/Llama-2-7b-hf",
+        temperature: float = 0.,
+        max_tokens: int = 1024) -> List:
+    sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
+    llm = LLM(model=model)
+    response = llm.generate(prompts, sampling_params)
+    return response

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-fastapi==0.74.*
-requests==2.27.*
 uvicorn[standard]==0.17.6
 vllm==0.2.2

+fastapi==0.74.1
+requests==2.27.1
 uvicorn[standard]==0.17.6
 vllm==0.2.2