File size: 1,363 Bytes
39de480
 
 
 
d35553d
39de480
d35553d
39de480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95390bd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.docstore.document import Document
from langchain.document_loaders import PyPDFLoader
# from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores.faiss import FAISS

EMBEDDINGS_MODEL_NAME="all-MiniLM-L6-v2"
embeddings_model_name =EMBEDDINGS_MODEL_NAME
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
persist_directory = "data/cbsl"
index_path = persist_directory

chunk_size=1000
chunk_overlap=50


def create_faiss():
    # documents = DirectoryLoader(persist_directory,  loader_cls=PyMuPDFLoader).load()
    documents = DirectoryLoader("CBSL",  loader_cls=PyPDFLoader).load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    texts = text_splitter.split_documents(documents)
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
   
    vectorstore = FAISS.from_documents(texts, embeddings)
    vectorstore.save_local("faiss_index")


def load_FAISS_store():
    print("> faiss_index_with_year_2000_chunk loaded")
    return FAISS.load_local("faiss_index_with_year_2000_chunk", embeddings)