Spaces:
Runtime error
Runtime error
"""Faiss Vector store index. | |
An index that that is built on top of an existing vector store. | |
""" | |
from typing import Any, List, Optional, cast | |
import numpy as np | |
from gpt_index.vector_stores.types import ( | |
NodeEmbeddingResult, | |
VectorStore, | |
VectorStoreQueryResult, | |
) | |
class FaissVectorStore(VectorStore): | |
"""Faiss Vector Store. | |
Embeddings are stored within a Faiss index. | |
During query time, the index uses Faiss to query for the top | |
k embeddings, and returns the corresponding indices. | |
Args: | |
faiss_index (faiss.Index): Faiss index instance | |
""" | |
stores_text: bool = False | |
def __init__( | |
self, | |
faiss_index: Any, | |
) -> None: | |
"""Initialize params.""" | |
import_err_msg = """ | |
`faiss` package not found. For instructions on | |
how to install `faiss` please visit | |
https://github.com/facebookresearch/faiss/wiki/Installing-Faiss | |
""" | |
try: | |
import faiss # noqa: F401 | |
except ImportError: | |
raise ImportError(import_err_msg) | |
self._faiss_index = cast(faiss.Index, faiss_index) | |
def config_dict(self) -> dict: | |
"""Return config dict.""" | |
return {} | |
def add( | |
self, | |
embedding_results: List[NodeEmbeddingResult], | |
) -> List[str]: | |
"""Add embedding results to index. | |
NOTE: in the Faiss vector store, we do not store text in Faiss. | |
Args | |
embedding_results: List[NodeEmbeddingResult]: list of embedding results | |
""" | |
new_ids = [] | |
for result in embedding_results: | |
text_embedding = result.embedding | |
text_embedding_np = np.array(text_embedding, dtype="float32")[np.newaxis, :] | |
new_id = str(self._faiss_index.ntotal) | |
self._faiss_index.add(text_embedding_np) | |
new_ids.append(new_id) | |
return new_ids | |
def client(self) -> Any: | |
"""Return the faiss index.""" | |
return self._faiss_index | |
def load(cls, save_path: str) -> "FaissVectorStore": | |
"""Load vector store from disk. | |
Args: | |
save_path (str): The save_path of the file. | |
Returns: | |
FaissVectorStore: The loaded vector store. | |
""" | |
import faiss | |
faiss_index = faiss.read_index(save_path) | |
return cls(faiss_index) | |
def save( | |
self, | |
save_path: str, | |
) -> None: | |
"""Save to file. | |
This method saves the vector store to disk. | |
Args: | |
save_path (str): The save_path of the file. | |
""" | |
import faiss | |
faiss.write_index(self._faiss_index, save_path) | |
def delete(self, doc_id: str, **delete_kwargs: Any) -> None: | |
"""Delete a document. | |
Args: | |
doc_id (str): document id | |
""" | |
raise NotImplementedError("Delete not yet implemented for Faiss index.") | |
def query( | |
self, | |
query_embedding: List[float], | |
similarity_top_k: int, | |
doc_ids: Optional[List[str]] = None, | |
) -> VectorStoreQueryResult: | |
"""Query index for top k most similar nodes. | |
Args: | |
query_embedding (List[float]): query embedding | |
similarity_top_k (int): top k most similar nodes | |
""" | |
query_embedding_np = np.array(query_embedding, dtype="float32")[np.newaxis, :] | |
dists, indices = self._faiss_index.search(query_embedding_np, similarity_top_k) | |
dists = [d[0] for d in dists] | |
# if empty, then return an empty response | |
if len(indices) == 0: | |
return VectorStoreQueryResult(similarities=[], ids=[]) | |
# returned dimension is 1 x k | |
node_idxs = list([str(i) for i in indices[0]]) | |
return VectorStoreQueryResult(similarities=dists, ids=node_idxs) | |