Nikhil0987 commited on
Commit
55ae313
1 Parent(s): 56f4a05

Update ingest.py

Browse files
Files changed (1) hide show
  1. ingest.py +21 -21
ingest.py CHANGED
@@ -1,27 +1,27 @@
1
- from langchain_community.embeddings import HuggingFaceEmbeddings
2
- from langchain_community.vectorstores import FAISS
3
- from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
5
 
6
- DATA_PATH = 'data/'
7
- DB_FAISS_PATH = 'vectorstore/db_faiss'
8
 
9
- # Create vector database
10
- def create_vector_db():
11
- loader = DirectoryLoader(DATA_PATH,
12
- glob='*.pdf',
13
- loader_cls=PyPDFLoader)
14
 
15
- documents = loader.load()
16
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
17
- chunk_overlap=50)
18
- texts = text_splitter.split_documents(documents)
19
 
20
- embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
21
- model_kwargs={'device': 'cpu'})
22
 
23
- db = FAISS.from_documents(texts, embeddings)
24
- db.save_local(DB_FAISS_PATH)
25
 
26
- if __name__ == "__main__":
27
- create_vector_db()
 
1
+ # from langchain_community.embeddings import HuggingFaceEmbeddings
2
+ # from langchain_community.vectorstores import FAISS
3
+ # from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
5
 
6
+ # DATA_PATH = 'data/'
7
+ # DB_FAISS_PATH = 'vectorstore/db_faiss'
8
 
9
+ # # Create vector database
10
+ # def create_vector_db():
11
+ # loader = DirectoryLoader(DATA_PATH,
12
+ # glob='*.pdf',
13
+ # loader_cls=PyPDFLoader)
14
 
15
+ # documents = loader.load()
16
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
17
+ # chunk_overlap=50)
18
+ # texts = text_splitter.split_documents(documents)
19
 
20
+ # embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
21
+ # model_kwargs={'device': 'cpu'})
22
 
23
+ # db = FAISS.from_documents(texts, embeddings)
24
+ # db.save_local(DB_FAISS_PATH)
25
 
26
+ # if __name__ == "__main__":
27
+ # create_vector_db()