Spaces:

junaidiqbalsyed
/

qa_hr_chatbot

Sleeping

App Files Files Community

Syed Junaid Iqbal commited on Dec 13, 2023

Commit

d77386f

•

1 Parent(s): 2db6c26

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -18

app.py CHANGED Viewed

@@ -51,8 +51,8 @@ def get_text_chunks(documents):
     IMPORTANT :  If the chunks too small we will miss the context and if its too large we will have longer compute time
     """
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=600,
-        chunk_overlap=100,
     )
     st.session_state.text_chunks = text_splitter.split_documents(documents)
@@ -82,9 +82,8 @@ def get_conversation_chain():
     llm = LlamaCpp(model_path= model_path,
                    n_ctx=4000,
-                   max_tokens= 200,
                    n_gpu_layers = 40,
-                   n_batch = 512,
                    callback_manager = callback_manager,
                    verbose=True)
@@ -109,7 +108,7 @@ def get_conversation_chain():
     rag_prompt_custom = PromptTemplate.from_template(prompt_template)
-    prompt = hub.pull("rlm/rag-prompt")
     conversation_chain = RetrievalQA.from_chain_type(
         llm,
@@ -158,8 +157,8 @@ def add_rounded_edges(image_path="./randstad_featuredimage.png", radius=30):
     st.image(image_path, use_column_width=True, output_format='auto')
-# Delete our vector DB
-def delete_db(directory_path = './vectordb/'):
     # Check if the directory exists
     if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
@@ -177,7 +176,6 @@ def delete_db(directory_path = './vectordb/'):
         print(f"The directory {directory_path} does not exist.")
 def save_uploaded_file(uploaded_file):
     save_directory = "./documents/"
     file_path = os.path.join(save_directory, uploaded_file.name)
@@ -202,7 +200,7 @@ def load_dependencies():
 def main():
     load_dotenv()
-    st.set_page_config(page_title="Chat with multiple Files",
                        page_icon=":books:")
     st.write(css, unsafe_allow_html=True)
@@ -220,7 +218,7 @@ def main():
     # Embedding Model
-    st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-small-en-v1.5",
                                                                     cache_dir="./embedding_model/")
     with st.sidebar:
@@ -242,7 +240,10 @@ def main():
         if st.button("Process"):
             # delete the old embeddings
-            delete_db()
             # then Embedd new documents
             with st.spinner("Processing"):
@@ -252,13 +253,13 @@ def main():
                 for file in docs:
                     save_uploaded_file(file)
-                """
-                using the helper function below lets load our dependencies
-                Step 1 : Load the documents
-                Step 2 : Break them into Chunks
-                Step 3 : Create Embeddings and save them to Vector DB
-                Step 4 : Get our conversation chain
-                """
                 load_dependencies()
     # Load our model

     IMPORTANT :  If the chunks too small we will miss the context and if its too large we will have longer compute time
     """
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size= 400,
+        chunk_overlap=50,
     )
     st.session_state.text_chunks = text_splitter.split_documents(documents)
     llm = LlamaCpp(model_path= model_path,
                    n_ctx=4000,
+                   max_tokens= 4000,
                    n_gpu_layers = 40,
                    callback_manager = callback_manager,
                    verbose=True)
     rag_prompt_custom = PromptTemplate.from_template(prompt_template)
+    prompt = hub.pull("rlm/rag-prompt-mistral")
     conversation_chain = RetrievalQA.from_chain_type(
         llm,
     st.image(image_path, use_column_width=True, output_format='auto')
+# Delete directory content
+def delete_file(directory_path):
     # Check if the directory exists
     if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
         print(f"The directory {directory_path} does not exist.")
 def save_uploaded_file(uploaded_file):
     save_directory = "./documents/"
     file_path = os.path.join(save_directory, uploaded_file.name)
 def main():
     load_dotenv()
+    st.set_page_config(page_title="Randstad Chad Bot",
                        page_icon=":books:")
     st.write(css, unsafe_allow_html=True)
     # Embedding Model
+    st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-base-en-v1.5",
                                                                     cache_dir="./embedding_model/")
     with st.sidebar:
         if st.button("Process"):
             # delete the old embeddings
+            delete_file(directory_path= './vectordb/')
+            # delete old documents
+            delete_file(directory_path="./documents/")
             # then Embedd new documents
             with st.spinner("Processing"):
                 for file in docs:
                     save_uploaded_file(file)
+                # using the helper function below lets load our dependencies
+                # Step 1 : Load the documents
+                # Step 2 : Break them into Chunks
+                # Step 3 : Create Embeddings and save them to Vector DB
+                # Step 4 : Get our conversation chain
                 load_dependencies()
     # Load our model