Spaces:
Running
Running
File size: 1,473 Bytes
f8813f2 efea2bf f8813f2 efea2bf b37221e efea2bf f8813f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
import logging
from typing import (
List,
Optional,
Literal,
)
MODEL_ARGS = {
"llama3.2": dict(
repo_id="hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF",
filename="llama-3.2-3b-instruct-q8_0.gguf",
),
"falcon-mamba": dict(
repo_id="bartowski/falcon-mamba-7b-GGUF",
filename="falcon-mamba-7b-Q4_K_M.gguf",
),
"mistral-nemo": dict(
repo_id="lmstudio-community/Mistral-Nemo-Instruct-2407-GGUF",
filename="Mistral-Nemo-Instruct-2407-Q4_K_M.gguf",
),
}
logger = logging.getLogger("uvicorn.error")
for model_arg in MODEL_ARGS.values():
logger.info(f"Checking for {model_arg['repo_id']}")
hf_hub_download(**model_arg)
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
chat_history: List[Message]
model: Literal["llama3.2", "falcon-mamba", "mistral-nemo"] = "llama3.2"
max_tokens: Optional[int] = 65536
temperature: float = 0.8
top_p: float = 0.95
min_p: float = 0.05
typical_p: float = 1.0
frequency_penalty: float = 0.0
presence_penalty: float = 0.0
repeat_penalty: float = 1.0
top_k: int = 40
seed: Optional[int] = None
tfs_z: float = 1.0
mirostat_mode: int = 0
mirostat_tau: float = 5.0
mirostat_eta: float = 0.1
# logprobs: Optional[int] = None
# logit_bias: Optional[Dict[str, float]] = None
|