import streamlit as st #from langchain.retrievers import KNNRetriever from langchain.storage import LocalFileStore from langchain.embeddings import CacheBackedEmbeddings from langchain.vectorstores import FAISS #from streamapp import * from PIL import Image from langchain.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter st.sidebar.image(Image.open("./test-logo.png"), use_column_width=True) print("Loading Index Page!!") #if 'vectorstore' in st.session_state.keys(): vectorstore = st.session_state['vectorstore'] # else: # retriever = initialize_vectorstore() # vectorstore = st.session_state['vectorstore'] def _text_splitter(doc): text_splitter = RecursiveCharacterTextSplitter( chunk_size=600, chunk_overlap=50, length_function=len, ) return text_splitter.transform_documents(doc) def _load_docs(path: str): load_doc = WebBaseLoader(path).load() doc = _text_splitter(load_doc) return doc with st.form("Index documents to Vector Store"): file_path = st.text_input(label="Enter the web link", value="", placeholder="", label_visibility="visible", disabled=False) print("file_path " ,file_path) submitted = st.form_submit_button("Submit") if submitted: st.write("Submitted web link: " + file_path) webpage_loader = _load_docs(file_path) webpage_chunks = _text_splitter(webpage_loader) # store embeddings in vector store print("vectorstore length before addition, ", len(vectorstore.serialize_to_bytes())) vectorstore.add_documents(webpage_chunks) print("vectorstore length after addition, ", len(vectorstore.serialize_to_bytes())) st.session_state['vectorstore'] = vectorstore retriever = vectorstore.as_retriever() st.session_state['retriever'] = retriever st.session_state['docadd'] = 1 st.markdown('

Document loaded to vector store successfully!!

', unsafe_allow_html=True)