LLM_endpoint / schema.py
Yapp99's picture
Some minor tweaks
b37221e
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
import logging
from typing import (
List,
Optional,
Literal,
)
MODEL_ARGS = {
"llama3.2": dict(
repo_id="hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF",
filename="llama-3.2-3b-instruct-q8_0.gguf",
),
"falcon-mamba": dict(
repo_id="bartowski/falcon-mamba-7b-GGUF",
filename="falcon-mamba-7b-Q4_K_M.gguf",
),
"mistral-nemo": dict(
repo_id="lmstudio-community/Mistral-Nemo-Instruct-2407-GGUF",
filename="Mistral-Nemo-Instruct-2407-Q4_K_M.gguf",
),
}
logger = logging.getLogger("uvicorn.error")
for model_arg in MODEL_ARGS.values():
logger.info(f"Checking for {model_arg['repo_id']}")
hf_hub_download(**model_arg)
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
chat_history: List[Message]
model: Literal["llama3.2", "falcon-mamba", "mistral-nemo"] = "llama3.2"
max_tokens: Optional[int] = 65536
temperature: float = 0.8
top_p: float = 0.95
min_p: float = 0.05
typical_p: float = 1.0
frequency_penalty: float = 0.0
presence_penalty: float = 0.0
repeat_penalty: float = 1.0
top_k: int = 40
seed: Optional[int] = None
tfs_z: float = 1.0
mirostat_mode: int = 0
mirostat_tau: float = 5.0
mirostat_eta: float = 0.1
# logprobs: Optional[int] = None
# logit_bias: Optional[Dict[str, float]] = None