Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from typing import List | |
from vllm import LLM, SamplingParams | |
app = FastAPI() | |
def read_root( | |
prompts: List[str], | |
model: str = "meta-llama/Llama-2-7b-hf", | |
temperature: float = 0., | |
max_tokens: int = 1024) -> List: | |
sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens) | |
llm = LLM(model=model) | |
response = llm.generate(prompts, sampling_params) | |
return response | |