Wolf369 commited on
Commit
0a57fdc
1 Parent(s): ab9cab0
Files changed (1) hide show
  1. main.py +2 -2
main.py CHANGED
@@ -13,13 +13,13 @@ app = FastAPI()
13
  @app.get("/llm_inference")
14
  def read_root(
15
  prompt: str,
16
- model: str = "meta-llama/Llama-2-7b-chat-hf",
17
  temperature: float = 0.,
18
  max_tokens: int = 1024) -> List:
19
  sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
20
 
21
  llm = LLM(model=model)
22
 
23
- response = llm.generate([prompt], sampling_params, token=token)
24
 
25
  return response
 
13
  @app.get("/llm_inference")
14
  def read_root(
15
  prompt: str,
16
+ model: str = "mistralai/Mistral-7B-v0.1",
17
  temperature: float = 0.,
18
  max_tokens: int = 1024) -> List:
19
  sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
20
 
21
  llm = LLM(model=model)
22
 
23
+ response = llm.generate([prompt], sampling_params)
24
 
25
  return response