Wolf369 commited on
Commit
81f3106
1 Parent(s): d064a1c
Files changed (1) hide show
  1. main.py +8 -2
main.py CHANGED
@@ -1,6 +1,12 @@
1
  from fastapi import FastAPI
2
  from typing import List
3
  from vllm import LLM, SamplingParams
 
 
 
 
 
 
4
 
5
  app = FastAPI()
6
 
@@ -8,10 +14,10 @@ app = FastAPI()
8
  @app.get("/llm_inference")
9
  def read_root(
10
  prompt: str,
11
- model: str = "meta-llama/Llama-2-7B-hf",
12
  temperature: float = 0.,
13
  max_tokens: int = 1024) -> List:
14
- sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
15
 
16
  llm = LLM(model=model)
17
 
 
1
  from fastapi import FastAPI
2
  from typing import List
3
  from vllm import LLM, SamplingParams
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ token: str = os.environ.get("HUGGINGFACE_TOKEN")
10
 
11
  app = FastAPI()
12
 
 
14
  @app.get("/llm_inference")
15
  def read_root(
16
  prompt: str,
17
+ model: str = "meta-llama/Llama-2-7b-hf",
18
  temperature: float = 0.,
19
  max_tokens: int = 1024) -> List:
20
+ sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens, token=token)
21
 
22
  llm = LLM(model=model)
23