|
from typing import Literal, Optional, List, Dict, Any, Union |
|
|
|
import time |
|
|
|
import shortuuid |
|
from pydantic import BaseModel, Field |
|
|
|
|
|
class ErrorResponse(BaseModel): |
|
object: str = "error" |
|
message: str |
|
code: int |
|
|
|
|
|
class ModelPermission(BaseModel): |
|
id: str = Field(default_factory=lambda: f"modelperm-{shortuuid.random()}") |
|
object: str = "model_permission" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
allow_create_engine: bool = False |
|
allow_sampling: bool = True |
|
allow_logprobs: bool = True |
|
allow_search_indices: bool = True |
|
allow_view: bool = True |
|
allow_fine_tuning: bool = False |
|
organization: str = "*" |
|
group: Optional[str] = None |
|
is_blocking: str = False |
|
|
|
|
|
class ModelCard(BaseModel): |
|
id: str |
|
object: str = "model" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
owned_by: str = "fastchat" |
|
root: Optional[str] = None |
|
parent: Optional[str] = None |
|
permission: List[ModelPermission] = [] |
|
|
|
|
|
class ModelList(BaseModel): |
|
object: str = "list" |
|
data: List[ModelCard] = [] |
|
|
|
|
|
class UsageInfo(BaseModel): |
|
prompt_tokens: int = 0 |
|
total_tokens: int = 0 |
|
completion_tokens: Optional[int] = 0 |
|
|
|
|
|
class APIChatCompletionRequest(BaseModel): |
|
model: str |
|
messages: Union[str, List[Dict[str, str]]] |
|
temperature: Optional[float] = 0.7 |
|
top_p: Optional[float] = 1.0 |
|
top_k: Optional[int] = -1 |
|
n: Optional[int] = 1 |
|
max_tokens: Optional[int] = None |
|
stop: Optional[Union[str, List[str]]] = None |
|
stream: Optional[bool] = False |
|
user: Optional[str] = None |
|
repetition_penalty: Optional[float] = 1.0 |
|
frequency_penalty: Optional[float] = 0.0 |
|
presence_penalty: Optional[float] = 0.0 |
|
|
|
|
|
class ChatMessage(BaseModel): |
|
role: str |
|
content: str |
|
|
|
|
|
class ChatCompletionResponseChoice(BaseModel): |
|
index: int |
|
message: ChatMessage |
|
finish_reason: Optional[Literal["stop", "length"]] = None |
|
|
|
|
|
class ChatCompletionResponse(BaseModel): |
|
id: str = Field(default_factory=lambda: f"chatcmpl-{shortuuid.random()}") |
|
object: str = "chat.completion" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
model: str |
|
choices: List[ChatCompletionResponseChoice] |
|
usage: UsageInfo |
|
|
|
|
|
class DeltaMessage(BaseModel): |
|
role: Optional[str] = None |
|
content: Optional[str] = None |
|
|
|
|
|
class ChatCompletionResponseStreamChoice(BaseModel): |
|
index: int |
|
delta: DeltaMessage |
|
finish_reason: Optional[Literal["stop", "length"]] = None |
|
|
|
|
|
class ChatCompletionStreamResponse(BaseModel): |
|
id: str = Field(default_factory=lambda: f"chatcmpl-{shortuuid.random()}") |
|
object: str = "chat.completion.chunk" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
model: str |
|
choices: List[ChatCompletionResponseStreamChoice] |
|
|
|
|
|
class APITokenCheckRequestItem(BaseModel): |
|
model: str |
|
prompt: str |
|
max_tokens: int |
|
|
|
|
|
class APITokenCheckRequest(BaseModel): |
|
prompts: List[APITokenCheckRequestItem] |
|
|
|
|
|
class APITokenCheckResponseItem(BaseModel): |
|
fits: bool |
|
tokenCount: int |
|
contextLength: int |
|
|
|
|
|
class APITokenCheckResponse(BaseModel): |
|
prompts: List[APITokenCheckResponseItem] |
|
|
|
|
|
class CompletionRequest(BaseModel): |
|
model: str |
|
prompt: Union[str, List[Any]] |
|
suffix: Optional[str] = None |
|
temperature: Optional[float] = 0.7 |
|
n: Optional[int] = 1 |
|
max_tokens: Optional[int] = 16 |
|
stop: Optional[Union[str, List[str]]] = None |
|
stream: Optional[bool] = False |
|
top_p: Optional[float] = 1.0 |
|
top_k: Optional[int] = -1 |
|
logprobs: Optional[int] = None |
|
echo: Optional[bool] = False |
|
presence_penalty: Optional[float] = 0.0 |
|
frequency_penalty: Optional[float] = 0.0 |
|
user: Optional[str] = None |
|
|
|
|
|
class CompletionResponseChoice(BaseModel): |
|
index: int |
|
text: str |
|
logprobs: Optional[int] = None |
|
finish_reason: Optional[Literal["stop", "length"]] = None |
|
|
|
|
|
class CompletionResponse(BaseModel): |
|
id: str = Field(default_factory=lambda: f"cmpl-{shortuuid.random()}") |
|
object: str = "text_completion" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
model: str |
|
choices: List[CompletionResponseChoice] |
|
usage: UsageInfo |
|
|
|
|
|
class CompletionResponseStreamChoice(BaseModel): |
|
index: int |
|
text: str |
|
logprobs: Optional[float] = None |
|
finish_reason: Optional[Literal["stop", "length"]] = None |
|
|
|
|
|
class CompletionStreamResponse(BaseModel): |
|
id: str = Field(default_factory=lambda: f"cmpl-{shortuuid.random()}") |
|
object: str = "text_completion" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
model: str |
|
choices: List[CompletionResponseStreamChoice] |
|
|