from langchain_qdrant import QdrantVectorStore from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams import os from dotenv import load_dotenv load_dotenv() url=os.getenv('QDRANT_URL') api_key=os.getenv('QDRANT_API_KEY') client=QdrantClient( url=url, api_key=api_key, ) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") loader1 = PyPDFLoader("sam-a16.pdf") loader2 = PyPDFLoader("sam-s25.pdf") loader3 = PyPDFLoader("sam-fold.pdf") docs1 = loader1.load() docs2 = loader2.load() docs3 = loader3.load() docs = docs1 + docs2 + docs3 text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, # chunk size (characters) chunk_overlap=200, # chunk overlap (characters) add_start_index=True, # track index in original document ) all_splits = text_splitter.split_documents(docs) client.create_collection( collection_name="multidoc-rag-agent", vectors_config=VectorParams(size=768, distance=Distance.COSINE), ) print(f"Split blog post into {len(all_splits)} sub-documents.") vector_store = QdrantVectorStore(client=client, embedding=embeddings, collection_name="multidoc-rag-agent") vector_store.add_documents(all_splits) print("Documents stored in Qdrant.")