Spaces:

gordonchan
/

embedding-m3e-large

Running

App Files Files Community

embedding-m3e-large / api /core /llama_cpp_engine.py

gordonchan

Upload 41 files

ca56e6a verified over 1 year ago

raw

history blame contribute delete

6.4 kB

	from typing import (
	Optional,
	List,
	Union,
	Dict,
	Iterator,
	Any,
	)

	from llama_cpp import Llama
	from openai.types.chat import (
	ChatCompletionMessage,
	ChatCompletion,
	ChatCompletionChunk,
	)
	from openai.types.chat import ChatCompletionMessageParam
	from openai.types.chat.chat_completion import Choice
	from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
	from openai.types.chat.chat_completion_chunk import ChoiceDelta
	from openai.types.completion_usage import CompletionUsage

	from api.adapter import get_prompt_adapter
	from api.utils.compat import model_parse


	class LlamaCppEngine:
	def __init__(
	self,
	model: Llama,
	model_name: str,
	prompt_name: Optional[str] = None,
	):
	"""
	Initializes a LlamaCppEngine instance.

	Args:
	model (Llama): The Llama model to be used by the engine.
	model_name (str): The name of the model.
	prompt_name (Optional[str], optional): The name of the prompt. Defaults to None.
	"""
	self.model = model
	self.model_name = model_name.lower()
	self.prompt_name = prompt_name.lower() if prompt_name is not None else None
	self.prompt_adapter = get_prompt_adapter(self.model_name, prompt_name=self.prompt_name)

	def apply_chat_template(
	self,
	messages: List[ChatCompletionMessageParam],
	functions: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
	tools: Optional[List[Dict[str, Any]]] = None,
	) -> str:
	"""
	Applies a chat template to the given list of messages.

	Args:
	messages (List[ChatCompletionMessageParam]): The list of chat completion messages.
	functions (Optional[Union[Dict[str, Any], List[Dict[str, Any]]]], optional): The functions to be applied to the messages. Defaults to None.
	tools (Optional[List[Dict[str, Any]]], optional): The tools to be used for postprocessing the messages. Defaults to None.

	Returns:
	str: The chat template applied to the messages.
	"""
	if self.prompt_adapter.function_call_available:
	messages = self.prompt_adapter.postprocess_messages(messages, functions, tools)
	return self.prompt_adapter.apply_chat_template(messages)

	def create_completion(self, prompt, **kwargs) -> Union[Iterator, Dict[str, Any]]:
	"""
	Creates a completion using the specified prompt and additional keyword arguments.

	Args:
	prompt (str): The prompt for the completion.
	**kwargs: Additional keyword arguments to be passed to the model's create_completion method.

	Returns:
	Union[Iterator, Dict[str, Any]]: The completion generated by the model.
	"""
	return self.model.create_completion(prompt, **kwargs)

	def _create_chat_completion(self, prompt, **kwargs) -> ChatCompletion:
	"""
	Creates a chat completion using the specified prompt and additional keyword arguments.

	Args:
	prompt (str): The prompt for the chat completion.
	**kwargs: Additional keyword arguments to be passed to the create_completion method.

	Returns:
	ChatCompletion: The chat completion generated by the model.
	"""
	completion = self.create_completion(prompt, **kwargs)
	message = ChatCompletionMessage(
	role="assistant",
	content=completion["choices"][0]["text"].strip(),
	)
	choice = Choice(
	index=0,
	message=message,
	finish_reason="stop",
	logprobs=None,
	)
	usage = model_parse(CompletionUsage, completion["usage"])
	return ChatCompletion(
	id="chat" + completion["id"],
	choices=[choice],
	created=completion["created"],
	model=completion["model"],
	object="chat.completion",
	usage=usage,
	)

	def _create_chat_completion_stream(self, prompt, **kwargs) -> Iterator:
	"""
	Generates a stream of chat completion chunks based on the given prompt.

	Args:
	prompt (str): The prompt for generating chat completion chunks.
	**kwargs: Additional keyword arguments for creating completions.

	Yields:
	ChatCompletionChunk: A chunk of chat completion generated from the prompt.
	"""
	completion = self.create_completion(prompt, **kwargs)
	for i, output in enumerate(completion):
	_id, _created, _model = output["id"], output["created"], output["model"]
	if i == 0:
	choice = ChunkChoice(
	index=0,
	delta=ChoiceDelta(role="assistant", content=""),
	finish_reason=None,
	logprobs=None,
	)
	yield ChatCompletionChunk(
	id=f"chat{_id}",
	choices=[choice],
	created=_created,
	model=_model,
	object="chat.completion.chunk",
	)

	if output["choices"][0]["finish_reason"] is None:
	delta = ChoiceDelta(content=output["choices"][0]["text"])
	else:
	delta = ChoiceDelta()

	choice = ChunkChoice(
	index=0,
	delta=delta,
	finish_reason=output["choices"][0]["finish_reason"],
	logprobs=None,
	)
	yield ChatCompletionChunk(
	id=f"chat{_id}",
	choices=[choice],
	created=_created,
	model=_model,
	object="chat.completion.chunk",
	)

	def create_chat_completion(self, prompt, **kwargs) -> Union[Iterator, ChatCompletion]:
	return (
	self._create_chat_completion_stream(prompt, **kwargs)
	if kwargs.get("stream", False)
	else self._create_chat_completion(prompt, **kwargs)
	)

	@property
	def stop(self):
	"""
	Gets the stop property of the prompt adapter.

	Returns:
	The stop property of the prompt adapter, or None if it does not exist.
	"""
	return self.prompt_adapter.stop if hasattr(self.prompt_adapter, "stop") else None