File size: 590 Bytes
78e119e
8b64a94
690145e
81f3106
 
 
 
 
 
78e119e
 
 
8b64a94
 
a2415d5
0a57fdc
8b64a94
 
963cfae
63464ea
8b64a94
63464ea
0a57fdc
63464ea
8b64a94
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from fastapi import FastAPI
from typing import List
from vllm import LLM, SamplingParams
import os
from dotenv import load_dotenv

load_dotenv()

token: str = os.environ.get("HUGGINGFACE_TOKEN")

app = FastAPI()

@app.get("/llm_inference")
def read_root(
        prompt: str,
        model: str = "mistralai/Mistral-7B-v0.1",
        temperature: float = 0.,
        max_tokens: int = 1024) -> List:
    sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)

    llm = LLM(model=model)

    response = llm.generate([prompt], sampling_params)

    return response