Spaces:
Running
Running
File size: 1,209 Bytes
6d38d15 e42468d 6d38d15 e42468d 6d38d15 e42468d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
class Data:
def __init__(self, urls):
self.urls = urls
## Embedding Using Huggingface
self.huggingface_embeddings = HuggingFaceBgeEmbeddings(
model_name="BAAI/bge-small-en-v1.5", #sentence-transformers/all-MiniLM-l6-v2
model_kwargs={'device':'cpu'},
encode_kwargs={'normalize_embeddings':True}
)
@property
def retriever(self):
loader = UnstructuredURLLoader(urls=self.urls)
data = loader.load()
## VectorStore Creation
vectorstore = FAISS.from_documents(data, self.huggingface_embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
return retriever
|