| import os |
| import uuid |
| import faiss |
| import shutil |
| import logging |
| import pandas as pd |
| from typing import Any |
| from langchain_core import documents |
| from langchain_community import embeddings |
| from langchain_community import vectorstores |
| from langchain_community.docstore import in_memory |
|
|
|
|
| DEFAULT_INDEX_QUERY = "hello world" |
|
|
|
|
| def build_faiss( |
| data_frame: pd.DataFrame, |
| index_path: str, |
| embedder: Any |
| ) -> vectorstores.FAISS: |
| """Build a FAISS index from a DataFrame. |
| |
| Args: |
| data_frame: DataFrame containing data to index |
| index_path: Path where to save the FAISS index |
| embedder: Embedder object to generate vectors |
| |
| Returns: |
| vectorstores.FAISS: Built FAISS vectorstore object |
| """ |
| embedded_documents = [] |
| for row_idx, row in data_frame.iterrows(): |
| for col_name, cell_val in row.items(): |
| embedded_documents.append(documents.Document( |
| page_content=str(cell_val), |
| metadata={"row": row_idx, "column": col_name}, |
| )) |
|
|
| if os.path.exists(index_path): |
| shutil.rmtree(index_path, ignore_errors=True) |
| logging.debug(f"Deleted existing FAISS index at {index_path}") |
|
|
| vectorstore = vectorstores.FAISS( |
| embedding_function=embedder, |
| index=faiss.IndexFlatIP(len(embedder.embed_query(DEFAULT_INDEX_QUERY))), |
| docstore=in_memory.InMemoryDocstore(), |
| index_to_docstore_id={}, |
| ) |
|
|
| uuids = [str(uuid.uuid4()) for _ in range(len(embedded_documents))] |
| vectorstore.add_documents(documents=embedded_documents, ids=uuids) |
| logging.debug(f"Added {len(embedded_documents)} documents to FAISS index") |
|
|
| os.makedirs(index_path, exist_ok=True) |
| vectorstore.save_local(index_path) |
| logging.debug(f"FAISS index saved to ./{index_path}/") |
| return vectorstore |
|
|
|
|
| def load_faiss_index( |
| index_path: str, |
| hf_model_name: str |
| ) -> vectorstores.FAISS: |
| """Load a previously saved FAISS index. |
| |
| Args: |
| index_path: Path of the saved FAISS index |
| hf_model_name: Name of the HuggingFace model for embeddings |
| |
| Returns: |
| vectorstores.FAISS: Loaded FAISS vectorstore object |
| """ |
| embedder = embeddings.HuggingFaceEmbeddings(model_name=hf_model_name) |
| return vectorstores.FAISS.load_local(index_path, embedder, allow_dangerous_deserialization=True) |
|
|