Spaces:

Dekode
/

DocLLM

Sleeping

App Files Files Community

Dekode commited on Feb 4, 2024

Commit

923a652

verified ·

1 Parent(s): 779a470

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -8

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
 from langchain.text_splitter import CharacterTextSplitter
@@ -10,7 +11,7 @@ from langchain_community.llms import HuggingFaceHub
 def make_vectorstore(embeddings):
     loader = PyPDFDirectoryLoader("data")
     documents = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
     texts = text_splitter.split_documents(documents)
     docsearch = FAISS.from_documents(texts, embeddings)
@@ -36,6 +37,9 @@ def get_response(conversation_chain, query):
 def main():
     st.title("Chat LLM")
     print("Downloading Embeddings Model")
     with st.spinner('Downloading Embeddings Model...'):
@@ -43,7 +47,7 @@ def main():
     print("Loading LLM from HuggingFace")
     with st.spinner('Loading LLM from HuggingFace...'):
-        llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50},)
     # multiple pdfs uploader in the side bar
     st.sidebar.title("Upload PDFs")
@@ -52,10 +56,16 @@ def main():
         for file in uploaded_files:
             with open(f"data/{file.name}", "wb") as f:
                 f.write(file.getbuffer())
         st.sidebar.success("PDFs uploaded successfully")
     else:
         st.sidebar.warning("Please upload PDFs")
-    # add a clear chat button which will clear the session state and the conversation history
     if "messages" not in st.session_state:
         st.session_state.messages = []
@@ -66,11 +76,6 @@ def main():
         else:
             st.chat_message("bot").markdown(message["content"])
-    with st.spinner('making a vectorstore database...'):
-        vectorstore = make_vectorstore(embeddings)
-    with st.spinner('making a conversation chain...'):
-        conversation_chain = get_conversation(vectorstore, llm)
     user_prompt = st.chat_input("ask a question", key="user")
     if user_prompt:
         st.chat_message("user").markdown(user_prompt)

+import os
 import streamlit as st
 from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
 from langchain.text_splitter import CharacterTextSplitter
 def make_vectorstore(embeddings):
     loader = PyPDFDirectoryLoader("data")
     documents = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=0)
     texts = text_splitter.split_documents(documents)
     docsearch = FAISS.from_documents(texts, embeddings)
 def main():
     st.title("Chat LLM")
+    # create a folder named data
+    if not os.path.exists("data"):
+        os.makedirs("data")
     print("Downloading Embeddings Model")
     with st.spinner('Downloading Embeddings Model...'):
     print("Loading LLM from HuggingFace")
     with st.spinner('Loading LLM from HuggingFace...'):
+        llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.7, "max_new_tokens":512, "top_p":0.95, "top_k":50})
     # multiple pdfs uploader in the side bar
     st.sidebar.title("Upload PDFs")
         for file in uploaded_files:
             with open(f"data/{file.name}", "wb") as f:
                 f.write(file.getbuffer())
+        with st.spinner('making a vectorstore database...'):
+            vectorstore = make_vectorstore(embeddings)
+        with st.spinner('making a conversation chain...'):
+            conversation_chain = get_conversation(vectorstore, llm)
         st.sidebar.success("PDFs uploaded successfully")
     else:
         st.sidebar.warning("Please upload PDFs")
+    # add a clear chat button which will clear the session state
+    if st.button("Clear Chat"):
+        st.session_state.messages = []
     if "messages" not in st.session_state:
         st.session_state.messages = []
         else:
             st.chat_message("bot").markdown(message["content"])
     user_prompt = st.chat_input("ask a question", key="user")
     if user_prompt:
         st.chat_message("user").markdown(user_prompt)