Spaces:
Runtime error
Runtime error
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
# from langchain.docstore.document import Document | |
from langchain.document_loaders import PyPDFLoader | |
# from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import DirectoryLoader | |
from langchain.vectorstores.faiss import FAISS | |
EMBEDDINGS_MODEL_NAME="all-MiniLM-L6-v2" | |
embeddings_model_name =EMBEDDINGS_MODEL_NAME | |
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) | |
persist_directory = "data/cbsl" | |
index_path = persist_directory | |
chunk_size=1000 | |
chunk_overlap=50 | |
def create_faiss(): | |
# documents = DirectoryLoader(persist_directory, loader_cls=PyMuPDFLoader).load() | |
documents = DirectoryLoader("CBSL", loader_cls=PyPDFLoader).load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) | |
texts = text_splitter.split_documents(documents) | |
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) | |
vectorstore = FAISS.from_documents(texts, embeddings) | |
vectorstore.save_local("faiss_index") | |
def load_FAISS_store(): | |
print("> faiss_index_with_year_2000_chunk loaded") | |
return FAISS.load_local("faiss_index_with_year_2000_chunk", embeddings) | |