|
from typing import Any, Dict, List |
|
|
|
from langchain_core._api import deprecated |
|
from langchain_core.language_models import BaseLanguageModel |
|
from langchain_core.messages import BaseMessage, get_buffer_string |
|
|
|
from langchain.memory.chat_memory import BaseChatMemory |
|
|
|
|
|
@deprecated( |
|
since="0.3.1", |
|
removal="1.0.0", |
|
message=( |
|
"Please see the migration guide at: " |
|
"https://python.langchain.com/docs/versions/migrating_memory/" |
|
), |
|
) |
|
class ConversationTokenBufferMemory(BaseChatMemory): |
|
"""Conversation chat memory with token limit. |
|
|
|
Keeps only the most recent messages in the conversation under the constraint |
|
that the total number of tokens in the conversation does not exceed a certain limit. |
|
""" |
|
|
|
human_prefix: str = "Human" |
|
ai_prefix: str = "AI" |
|
llm: BaseLanguageModel |
|
memory_key: str = "history" |
|
max_token_limit: int = 2000 |
|
|
|
@property |
|
def buffer(self) -> Any: |
|
"""String buffer of memory.""" |
|
return self.buffer_as_messages if self.return_messages else self.buffer_as_str |
|
|
|
@property |
|
def buffer_as_str(self) -> str: |
|
"""Exposes the buffer as a string in case return_messages is False.""" |
|
return get_buffer_string( |
|
self.chat_memory.messages, |
|
human_prefix=self.human_prefix, |
|
ai_prefix=self.ai_prefix, |
|
) |
|
|
|
@property |
|
def buffer_as_messages(self) -> List[BaseMessage]: |
|
"""Exposes the buffer as a list of messages in case return_messages is True.""" |
|
return self.chat_memory.messages |
|
|
|
@property |
|
def memory_variables(self) -> List[str]: |
|
"""Will always return list of memory variables. |
|
|
|
:meta private: |
|
""" |
|
return [self.memory_key] |
|
|
|
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Return history buffer.""" |
|
return {self.memory_key: self.buffer} |
|
|
|
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: |
|
"""Save context from this conversation to buffer. Pruned.""" |
|
super().save_context(inputs, outputs) |
|
|
|
buffer = self.chat_memory.messages |
|
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) |
|
if curr_buffer_length > self.max_token_limit: |
|
pruned_memory = [] |
|
while curr_buffer_length > self.max_token_limit: |
|
pruned_memory.append(buffer.pop(0)) |
|
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) |
|
|