|
from langchain_community.document_loaders import PyMuPDFLoader |
|
from langchain_community.vectorstores import FAISS |
|
from config import embeddings |
|
|
|
def create_vectoreDB(file_path:str): |
|
try: |
|
loader = PyMuPDFLoader(file_path=file_path) |
|
documents = loader.load() |
|
|
|
|
|
for doc in documents: |
|
doc.page_content = doc.page_content.replace("\n \n", "").replace(" ", "").replace("----", "").replace("====", "") |
|
|
|
vectorstore = FAISS.from_documents( |
|
documents, |
|
embedding=embeddings |
|
) |
|
|
|
path = f"vectors/{file_path}".replace(".pdf","").replace("data/","") |
|
vectorstore.save_local(path) |
|
|
|
print(f"VectoreStore has been created at: {path}") |
|
return {"status": "completed"} |
|
|
|
except Exception as e: |
|
print(str(e)) |
|
return None |
|
|
|
|
|
create_vectoreDB("data/Oldcastle-KnowldgeBase.pdf") |