embedding-m3e-large / api /core /llama_cpp_engine.py
gordonchan's picture
Upload 41 files
ca56e6a verified
from typing import (
Optional,
List,
Union,
Dict,
Iterator,
Any,
)
from llama_cpp import Llama
from openai.types.chat import (
ChatCompletionMessage,
ChatCompletion,
ChatCompletionChunk,
)
from openai.types.chat import ChatCompletionMessageParam
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage
from api.adapter import get_prompt_adapter
from api.utils.compat import model_parse
class LlamaCppEngine:
def __init__(
self,
model: Llama,
model_name: str,
prompt_name: Optional[str] = None,
):
"""
Initializes a LlamaCppEngine instance.
Args:
model (Llama): The Llama model to be used by the engine.
model_name (str): The name of the model.
prompt_name (Optional[str], optional): The name of the prompt. Defaults to None.
"""
self.model = model
self.model_name = model_name.lower()
self.prompt_name = prompt_name.lower() if prompt_name is not None else None
self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name)
def apply_chat_template(
self,
messages: List[ChatCompletionMessageParam],
functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
tools: Optional[List[Dict[str, Any]]] = None,
) -> str:
"""
Applies a chat template to the given list of messages.
Args:
messages (List[ChatCompletionMessageParam]): The list of chat completion messages.
functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None.
tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None.
Returns:
str: The chat template applied to the messages.
"""
if self.prompt_adapter.function_call_available:
messages = self.prompt_adapter.postprocess_messages(messages, functions, tools)
return self.prompt_adapter.apply_chat_template(messages)
def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]:
"""
Creates a completion using the specified prompt and additional keyword arguments.
Args:
prompt (str): The prompt for the completion.
**kwargs: Additional keyword arguments to be passed to the model's create_completion method.
Returns:
Union[Iterator, Dict[str, Any]]: The completion generated by the model.
"""
return self.model.create_completion(prompt, **kwargs)
def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion:
"""
Creates a chat completion using the specified prompt and additional keyword arguments.
Args:
prompt (str): The prompt for the chat completion.
**kwargs: Additional keyword arguments to be passed to the create_completion method.
Returns:
ChatCompletion: The chat completion generated by the model.
"""
completion = self.create_completion(prompt, **kwargs)
message = ChatCompletionMessage(
role="assistant",
content=completion["choices"][0]["text"].strip(),
)
choice = Choice(
index=0,
message=message,
finish_reason="stop",
logprobs=None,
)
usage = model_parse(CompletionUsage, completion["usage"])
return ChatCompletion(
id="chat" + completion["id"],
choices=[choice],
created=completion["created"],
model=completion["model"],
object="chat.completion",
usage=usage,
)
def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator:
"""
Generates a stream of chat completion chunks based on the given prompt.
Args:
prompt (str): The prompt for generating chat completion chunks.
**kwargs: Additional keyword arguments for creating completions.
Yields:
ChatCompletionChunk: A chunk of chat completion generated from the prompt.
"""
completion = self.create_completion(prompt, **kwargs)
for i, output in enumerate(completion):
_id, _created, _model = output["id"], output["created"], output["model"]
if i == 0:
choice = ChunkChoice(
index=0,
delta=ChoiceDelta(role="assistant", content=""),
finish_reason=None,
logprobs=None,
)
yield ChatCompletionChunk(
id=f"chat{_id}",
choices=[choice],
created=_created,
model=_model,
object="chat.completion.chunk",
)
if output["choices"][0]["finish_reason"] is None:
delta = ChoiceDelta(content=output["choices"][0]["text"])
else:
delta = ChoiceDelta()
choice = ChunkChoice(
index=0,
delta=delta,
finish_reason=output["choices"][0]["finish_reason"],
logprobs=None,
)
yield ChatCompletionChunk(
id=f"chat{_id}",
choices=[choice],
created=_created,
model=_model,
object="chat.completion.chunk",
)
def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]:
return (
self._create_chat_completion_stream(prompt, **kwargs)
if kwargs.get("stream", False)
else self._create_chat_completion(prompt, **kwargs)
)
@property
def stop(self):
"""
Gets the stop property of the prompt adapter.
Returns:
The stop property of the prompt adapter, or None if it does not exist.
"""
return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None