Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import UnstructuredURLLoader | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
class Data: | |
def __init__(self, urls): | |
self.urls = urls | |
## Embedding Using Huggingface | |
self.huggingface_embeddings = HuggingFaceBgeEmbeddings( | |
model_name="BAAI/bge-small-en-v1.5", #sentence-transformers/all-MiniLM-l6-v2 | |
model_kwargs={'device':'cpu'}, | |
encode_kwargs={'normalize_embeddings':True} | |
) | |
def retriever(self): | |
loader = UnstructuredURLLoader(urls=self.urls) | |
data = loader.load() | |
## VectorStore Creation | |
vectorstore = FAISS.from_documents(data, self.huggingface_embeddings) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3}) | |
return retriever | |