import logging from langchain_community.vectorstores.chroma import Chroma from src.logging import logging_info from .BaseDB import BaseDB # TODO 数据库持久化 和 用户进入的加载。 class ChromaDB(BaseDB): def __init__(self, embedding_name: str = None, persist_dir=None) -> None: super().__init__(embedding_name, persist_dir) # logging_info(self.embedding) def init_db(self): self.client = Chroma( persist_directory=self.persist_dir, embedding_function=self.embedding ) def addStories(self, stories: str, metas: dict = None): logging_info(self.text_splitter(stories)[-1]) split_stories = self.text_splitter(stories) self.client.add_texts( texts=split_stories, metadatas=[metas] * len(split_stories) ) def searchBySim( self, query, n_results=5, metas: dict = None, only_return_document=True ): result = self.client.similarity_search(query, k=n_results, filter=metas) # print(result) if only_return_document: return [i.page_content for i in result] return result def deleteStoriesByMeta(self, metas): ids = self.searchByMeta(metas=metas)["ids"] if ids: self.client.delete(ids) def searchByMeta(self, metas=None, include: list[str] = None) -> dict[str, any]: return self.client.get(where=metas, include=include)