Spaces:
Runtime error
Runtime error
import json | |
from typing import Any, Iterator, List, Optional | |
from langchain_core.messages import ( | |
AIMessage, | |
AIMessageChunk, | |
BaseMessage, | |
ChatMessage, | |
HumanMessage, | |
SystemMessage, | |
) | |
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult | |
from langchain.callbacks.manager import ( | |
CallbackManagerForLLMRun, | |
) | |
from langchain.chat_models.base import BaseChatModel | |
from langchain.llms.ollama import _OllamaCommon | |
def _stream_response_to_chat_generation_chunk( | |
stream_response: str, | |
) -> ChatGenerationChunk: | |
"""Convert a stream response to a generation chunk.""" | |
parsed_response = json.loads(stream_response) | |
generation_info = parsed_response if parsed_response.get("done") is True else None | |
return ChatGenerationChunk( | |
message=AIMessageChunk(content=parsed_response.get("response", "")), | |
generation_info=generation_info, | |
) | |
class ChatOllama(BaseChatModel, _OllamaCommon): | |
"""Ollama locally runs large language models. | |
To use, follow the instructions at https://ollama.ai/. | |
Example: | |
.. code-block:: python | |
from langchain.chat_models import ChatOllama | |
ollama = ChatOllama(model="llama2") | |
""" | |
def _llm_type(self) -> str: | |
"""Return type of chat model.""" | |
return "ollama-chat" | |
def is_lc_serializable(cls) -> bool: | |
"""Return whether this model can be serialized by Langchain.""" | |
return True | |
def _format_message_as_text(self, message: BaseMessage) -> str: | |
if isinstance(message, ChatMessage): | |
message_text = f"\n\n{message.role.capitalize()}: {message.content}" | |
elif isinstance(message, HumanMessage): | |
message_text = f"[INST] {message.content} [/INST]" | |
elif isinstance(message, AIMessage): | |
message_text = f"{message.content}" | |
elif isinstance(message, SystemMessage): | |
message_text = f"<<SYS>> {message.content} <</SYS>>" | |
else: | |
raise ValueError(f"Got unknown type {message}") | |
return message_text | |
def _format_messages_as_text(self, messages: List[BaseMessage]) -> str: | |
return "\n".join( | |
[self._format_message_as_text(message) for message in messages] | |
) | |
def _generate( | |
self, | |
messages: List[BaseMessage], | |
stop: Optional[List[str]] = None, | |
run_manager: Optional[CallbackManagerForLLMRun] = None, | |
**kwargs: Any, | |
) -> ChatResult: | |
"""Call out to Ollama's generate endpoint. | |
Args: | |
messages: The list of base messages to pass into the model. | |
stop: Optional list of stop words to use when generating. | |
Returns: | |
Chat generations from the model | |
Example: | |
.. code-block:: python | |
response = ollama([ | |
HumanMessage(content="Tell me about the history of AI") | |
]) | |
""" | |
prompt = self._format_messages_as_text(messages) | |
final_chunk = super()._stream_with_aggregation( | |
prompt, stop=stop, run_manager=run_manager, verbose=self.verbose, **kwargs | |
) | |
chat_generation = ChatGeneration( | |
message=AIMessage(content=final_chunk.text), | |
generation_info=final_chunk.generation_info, | |
) | |
return ChatResult(generations=[chat_generation]) | |
def _stream( | |
self, | |
messages: List[BaseMessage], | |
stop: Optional[List[str]] = None, | |
run_manager: Optional[CallbackManagerForLLMRun] = None, | |
**kwargs: Any, | |
) -> Iterator[ChatGenerationChunk]: | |
prompt = self._format_messages_as_text(messages) | |
for stream_resp in self._create_stream(prompt, stop, **kwargs): | |
if stream_resp: | |
chunk = _stream_response_to_chat_generation_chunk(stream_resp) | |
yield chunk | |
if run_manager: | |
run_manager.on_llm_new_token( | |
chunk.text, | |
verbose=self.verbose, | |
) | |