Spaces:

zhangyi617
/

webui

Runtime error

App Files Files Community

webui / langchain /chat_models /ollama.py

zhangyi617

Upload folder using huggingface_hub

129cd69 over 1 year ago

raw

history blame contribute delete

4.12 kB

	import json
	from typing import Any, Iterator, List, Optional

	from langchain_core.messages import (
	AIMessage,
	AIMessageChunk,
	BaseMessage,
	ChatMessage,
	HumanMessage,
	SystemMessage,
	)
	from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult

	from langchain.callbacks.manager import (
	CallbackManagerForLLMRun,
	)
	from langchain.chat_models.base import BaseChatModel
	from langchain.llms.ollama import _OllamaCommon


	def _stream_response_to_chat_generation_chunk(
	stream_response: str,
	) -> ChatGenerationChunk:
	"""Convert a stream response to a generation chunk."""
	parsed_response = json.loads(stream_response)
	generation_info = parsed_response if parsed_response.get("done") is True else None
	return ChatGenerationChunk(
	message=AIMessageChunk(content=parsed_response.get("response", "")),
	generation_info=generation_info,
	)


	class ChatOllama(BaseChatModel, _OllamaCommon):
	"""Ollama locally runs large language models.

	To use, follow the instructions at https://ollama.ai/.

	Example:
	.. code-block:: python

	from langchain.chat_models import ChatOllama
	ollama = ChatOllama(model="llama2")
	"""

	@property
	def _llm_type(self) -> str:
	"""Return type of chat model."""
	return "ollama-chat"

	@classmethod
	def is_lc_serializable(cls) -> bool:
	"""Return whether this model can be serialized by Langchain."""
	return True

	def _format_message_as_text(self, message: BaseMessage) -> str:
	if isinstance(message, ChatMessage):
	message_text = f"\n\n{message.role.capitalize()}: {message.content}"
	elif isinstance(message, HumanMessage):
	message_text = f"[INST] {message.content} [/INST]"
	elif isinstance(message, AIMessage):
	message_text = f"{message.content}"
	elif isinstance(message, SystemMessage):
	message_text = f"<<SYS>> {message.content} <</SYS>>"
	else:
	raise ValueError(f"Got unknown type {message}")
	return message_text

	def _format_messages_as_text(self, messages: List[BaseMessage]) -> str:
	return "\n".join(
	[self._format_message_as_text(message) for message in messages]
	)

	def _generate(
	self,
	messages: List[BaseMessage],
	stop: Optional[List[str]] = None,
	run_manager: Optional[CallbackManagerForLLMRun] = None,
	**kwargs: Any,
	) -> ChatResult:
	"""Call out to Ollama's generate endpoint.

	Args:
	messages: The list of base messages to pass into the model.
	stop: Optional list of stop words to use when generating.

	Returns:
	Chat generations from the model

	Example:
	.. code-block:: python

	response = ollama([
	HumanMessage(content="Tell me about the history of AI")
	])
	"""

	prompt = self._format_messages_as_text(messages)
	final_chunk = super()._stream_with_aggregation(
	prompt, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs
	)
	chat_generation = ChatGeneration(
	message=AIMessage(content=final_chunk.text),
	generation_info=final_chunk.generation_info,
	)
	return ChatResult(generations=[chat_generation])

	def _stream(
	self,
	messages: List[BaseMessage],
	stop: Optional[List[str]] = None,
	run_manager: Optional[CallbackManagerForLLMRun] = None,
	**kwargs: Any,
	) -> Iterator[ChatGenerationChunk]:
	prompt = self._format_messages_as_text(messages)
	for stream_resp in self._create_stream(prompt, stop, **kwargs):
	if stream_resp:
	chunk = _stream_response_to_chat_generation_chunk(stream_resp)
	yield chunk
	if run_manager:
	run_manager.on_llm_new_token(
	chunk.text,
	verbose=self.verbose,
	)