Spaces:
Running
Running
from sentence_transformers import SentenceTransformer | |
import numpy as np | |
from typing import Sequence | |
import faiss | |
class Vectorizer: | |
def __init__(self, model) -> None: | |
"""Initialize the vectorizer with a pre-trained embedding model. | |
Args: model: The pre-trained embedding model to use for transforming prompts. | |
""" | |
self.model = model | |
self.index_size = 50000 | |
self.index = faiss.IndexFlatIP(self.index_size) | |
self.cached_index_idx_to_retrieval_db_idx = [] | |
def transform_and_add_to_index(self, prompts: Sequence[str]) -> np.ndarray: | |
"""Transform texts into numerical vectors using the specified model. | |
Args: prompts: The sequence of raw corpus prompts. Returns: Vectorized prompts | |
""" | |
embeddings = self.model.encode(prompts) | |
embedding_dimension = embeddings.shape[1] | |
print('Embedding dimension:', embedding_dimension) | |
self.index.add(np.array(embeddings)) | |