Spaces:
Runtime error
Runtime error
File size: 2,412 Bytes
6a57640 1e53020 6a57640 1e53020 6a57640 1e53020 6a57640 1e53020 6a57640 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from typing import List
from typing import Type
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import VectorStore
from langchain.vectorstores.faiss import FAISS
from knowledge_gpt.core.debug import FakeEmbeddings
from knowledge_gpt.core.debug import FakeVectorStore
from knowledge_gpt.core.parsing import File
class FolderIndex:
"""Index for a collection of files (a folder)"""
def __init__(self, files: List[File], index: VectorStore):
self.name: str = "default"
self.files = files
self.index: VectorStore = index
@staticmethod
def _combine_files(files: List[File]) -> List[Document]:
"""Combines all the documents in a list of files into a single list."""
all_texts = []
for file in files:
for doc in file.docs:
doc.metadata["file_name"] = file.name
doc.metadata["file_id"] = file.id
all_texts.append(doc)
return all_texts
@classmethod
def from_files(cls, files: List[File], embeddings: Embeddings, vector_store: Type[VectorStore]) -> "FolderIndex":
"""Creates an index from files."""
all_docs = cls._combine_files(files)
index = vector_store.from_documents(
documents=all_docs,
embedding=embeddings,
)
return cls(files=files, index=index)
def embed_files(files: List[File], embedding: str, vector_store: str, **kwargs) -> FolderIndex:
"""Embeds a collection of files and stores them in a FolderIndex."""
supported_embeddings: dict[str, Type[Embeddings]] = {
"openai": OpenAIEmbeddings,
"debug": FakeEmbeddings,
}
supported_vector_stores: dict[str, Type[VectorStore]] = {
"faiss": FAISS,
"debug": FakeVectorStore,
}
if embedding in supported_embeddings:
_embeddings = supported_embeddings[embedding](**kwargs)
else:
raise NotImplementedError(f"Embedding {embedding} not supported.")
if vector_store in supported_vector_stores:
_vector_store = supported_vector_stores[vector_store]
else:
raise NotImplementedError(f"Vector store {vector_store} not supported.")
return FolderIndex.from_files(files=files, embeddings=_embeddings, vector_store=_vector_store)
|