from langchain_community.document_loaders import UnstructuredURLLoader from langchain_community.document_loaders import PyPDFLoader from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint class Data: def __init__(self, urls): self.urls = urls ## Embedding Using Huggingface self.huggingface_embeddings = HuggingFaceBgeEmbeddings( model_name="BAAI/bge-small-en-v1.5", #sentence-transformers/all-MiniLM-l6-v2 model_kwargs={'device':'cpu'}, encode_kwargs={'normalize_embeddings':True} ) @property def retriever(self): loader = UnstructuredURLLoader(urls=self.urls) data = loader.load() ## VectorStore Creation vectorstore = FAISS.from_documents(data, self.huggingface_embeddings) retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3}) return retriever