File size: 1,209 Bytes
6d38d15
 
e42468d
 
 
 
 
 
 
 
6d38d15
e42468d
 
 
 
 
 
 
 
6d38d15
e42468d
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from langchain_community.document_loaders import UnstructuredURLLoader

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

class Data:
    def __init__(self, urls):
        self.urls = urls
         ## Embedding Using Huggingface
        self.huggingface_embeddings = HuggingFaceBgeEmbeddings(
            model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
            model_kwargs={'device':'cpu'},
            encode_kwargs={'normalize_embeddings':True}
        )

    @property
    def retriever(self):
        loader = UnstructuredURLLoader(urls=self.urls)
        data = loader.load()

        ## VectorStore Creation
        vectorstore = FAISS.from_documents(data, self.huggingface_embeddings)

        retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})

        return retriever