Wolf369 commited on
Commit
8b64a94
1 Parent(s): 49ae147

Update endpoint to make it more generic

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. main.py +11 -12
  3. requirements.txt +2 -2
Dockerfile CHANGED
@@ -1,8 +1,6 @@
1
  # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
  # you will also find guides on how best to write your Dockerfile
3
 
4
- FROM python:3.9
5
-
6
  # Set up a new user named "user" with user ID 1000
7
  RUN useradd -m -u 1000 user
8
 
@@ -19,6 +17,8 @@ WORKDIR $HOME/app
19
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
20
  COPY --chown=user . $HOME/app
21
 
 
 
22
  WORKDIR /code
23
 
24
  COPY ./requirements.txt /code/requirements.txt
 
1
  # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
  # you will also find guides on how best to write your Dockerfile
3
 
 
 
4
  # Set up a new user named "user" with user ID 1000
5
  RUN useradd -m -u 1000 user
6
 
 
17
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
18
  COPY --chown=user . $HOME/app
19
 
20
+ FROM python:3.10.9
21
+
22
  WORKDIR /code
23
 
24
  COPY ./requirements.txt /code/requirements.txt
main.py CHANGED
@@ -1,21 +1,20 @@
1
  from fastapi import FastAPI
 
2
  from vllm import LLM, SamplingParams
3
 
4
  app = FastAPI()
5
 
6
 
7
- @app.get("/")
8
- def read_root():
9
- prompts = [
10
- "Hello, my name is",
11
- "The president of the United States is",
12
- "The capital of France is",
13
- "The future of AI is",
14
- ]
15
- sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
16
 
17
- llm = LLM(model="facebook/opt-125m")
18
 
19
- outputs = llm.generate(prompts, sampling_params)
20
 
21
- return {"outputs": outputs}
 
1
  from fastapi import FastAPI
2
+ from typing import List
3
  from vllm import LLM, SamplingParams
4
 
5
  app = FastAPI()
6
 
7
 
8
+ @app.get("/llm_inference")
9
+ def read_root(
10
+ prompts: List[str],
11
+ model: str = "meta-llama/Llama-2-7b-hf",
12
+ temperature: float = 0.,
13
+ max_tokens: int = 1024) -> List:
14
+ sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
 
 
15
 
16
+ llm = LLM(model=model)
17
 
18
+ response = llm.generate(prompts, sampling_params)
19
 
20
+ return response
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- fastapi==0.74.*
2
- requests==2.27.*
3
  uvicorn[standard]==0.17.6
4
  vllm==0.2.2
 
1
+ fastapi==0.74.1
2
+ requests==2.27.1
3
  uvicorn[standard]==0.17.6
4
  vllm==0.2.2