ChatData / backend /vector_store /myscale_without_metadata.py
lqhl's picture
Synced repo using 'sync_with_huggingface' Github Action
e931b70 verified
raw
history blame
1.97 kB
from typing import Any, Optional, List
from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.vectorstores.myscale import MyScale, MyScaleSettings
from logger import logger
class MyScaleWithoutMetadataJson(MyScale):
def __init__(self, embedding: Embeddings, config: Optional[MyScaleSettings] = None, must_have_cols: List[str] = [],
**kwargs: Any) -> None:
try:
super().__init__(embedding, config, **kwargs)
except Exception as e:
logger.error(e)
self.must_have_cols: List[str] = must_have_cols
def _build_qstr(
self, q_emb: List[float], topk: int, where_str: Optional[str] = None
) -> str:
q_emb_str = ",".join(map(str, q_emb))
if where_str:
where_str = f"PREWHERE {where_str}"
else:
where_str = ""
q_str = f"""
SELECT {self.config.column_map['text']}, dist, {','.join(self.must_have_cols)}
FROM {self.config.database}.{self.config.table}
{where_str}
ORDER BY distance({self.config.column_map['vector']}, [{q_emb_str}])
AS dist {self.dist_order}
LIMIT {topk}
"""
return q_str
def similarity_search_by_vector(self, embedding: List[float], k: int = 4, where_str: Optional[str] = None,
**kwargs: Any) -> List[Document]:
q_str = self._build_qstr(embedding, k, where_str)
try:
return [
Document(
page_content=r[self.config.column_map["text"]],
metadata={k: r[k] for k in self.must_have_cols},
)
for r in self.client.query(q_str).named_results()
]
except Exception as e:
logger.error(
f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m")
return []