Chatbot / embeddings.py
Poonawala's picture
Update embeddings.py
1534ef4 verified
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from config import embeddings
def create_vectoreDB(file_path:str):
try:
loader = PyMuPDFLoader(file_path=file_path)
documents = loader.load()
# Process the text to remove "\n\n" and " "
for doc in documents:
doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "")
vectorstore = FAISS.from_documents(
documents,
embedding=embeddings
)
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","")
vectorstore.save_local(path)
print(f"VectoreStore has been created at: {path}")
return {"status": "completed"}
except Exception as e:
print(str(e))
return None
create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf")