autodocs / data.py
moctardiallo's picture
Use similarity retriever to provide context for '.respond'
ef93b68
raw
history blame
1.21 kB
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
class Data:
def __init__(self, urls):
self.urls = urls
## Embedding Using Huggingface
self.huggingface_embeddings = HuggingFaceBgeEmbeddings(
model_name="BAAI/bge-small-en-v1.5", #sentence-transformers/all-MiniLM-l6-v2
model_kwargs={'device':'cpu'},
encode_kwargs={'normalize_embeddings':True}
)
@property
def retriever(self):
loader = UnstructuredURLLoader(urls=self.urls)
data = loader.load()
## VectorStore Creation
vectorstore = FAISS.from_documents(data, self.huggingface_embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
return retriever