Syed Junaid Iqbal commited on
Commit
d77386f
1 Parent(s): 2db6c26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -51,8 +51,8 @@ def get_text_chunks(documents):
51
  IMPORTANT : If the chunks too small we will miss the context and if its too large we will have longer compute time
52
  """
53
  text_splitter = RecursiveCharacterTextSplitter(
54
- chunk_size=600,
55
- chunk_overlap=100,
56
  )
57
 
58
  st.session_state.text_chunks = text_splitter.split_documents(documents)
@@ -82,9 +82,8 @@ def get_conversation_chain():
82
 
83
  llm = LlamaCpp(model_path= model_path,
84
  n_ctx=4000,
85
- max_tokens= 200,
86
  n_gpu_layers = 40,
87
- n_batch = 512,
88
  callback_manager = callback_manager,
89
  verbose=True)
90
 
@@ -109,7 +108,7 @@ def get_conversation_chain():
109
 
110
  rag_prompt_custom = PromptTemplate.from_template(prompt_template)
111
 
112
- prompt = hub.pull("rlm/rag-prompt")
113
 
114
  conversation_chain = RetrievalQA.from_chain_type(
115
  llm,
@@ -158,8 +157,8 @@ def add_rounded_edges(image_path="./randstad_featuredimage.png", radius=30):
158
  st.image(image_path, use_column_width=True, output_format='auto')
159
 
160
 
161
- # Delete our vector DB
162
- def delete_db(directory_path = './vectordb/'):
163
 
164
  # Check if the directory exists
165
  if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
@@ -177,7 +176,6 @@ def delete_db(directory_path = './vectordb/'):
177
  print(f"The directory {directory_path} does not exist.")
178
 
179
 
180
-
181
  def save_uploaded_file(uploaded_file):
182
  save_directory = "./documents/"
183
  file_path = os.path.join(save_directory, uploaded_file.name)
@@ -202,7 +200,7 @@ def load_dependencies():
202
 
203
  def main():
204
  load_dotenv()
205
- st.set_page_config(page_title="Chat with multiple Files",
206
  page_icon=":books:")
207
  st.write(css, unsafe_allow_html=True)
208
 
@@ -220,7 +218,7 @@ def main():
220
 
221
 
222
  # Embedding Model
223
- st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-small-en-v1.5",
224
  cache_dir="./embedding_model/")
225
 
226
  with st.sidebar:
@@ -242,7 +240,10 @@ def main():
242
  if st.button("Process"):
243
 
244
  # delete the old embeddings
245
- delete_db()
 
 
 
246
 
247
  # then Embedd new documents
248
  with st.spinner("Processing"):
@@ -252,13 +253,13 @@ def main():
252
  for file in docs:
253
  save_uploaded_file(file)
254
 
255
- """
256
- using the helper function below lets load our dependencies
257
- Step 1 : Load the documents
258
- Step 2 : Break them into Chunks
259
- Step 3 : Create Embeddings and save them to Vector DB
260
- Step 4 : Get our conversation chain
261
- """
262
  load_dependencies()
263
 
264
  # Load our model
 
51
  IMPORTANT : If the chunks too small we will miss the context and if its too large we will have longer compute time
52
  """
53
  text_splitter = RecursiveCharacterTextSplitter(
54
+ chunk_size= 400,
55
+ chunk_overlap=50,
56
  )
57
 
58
  st.session_state.text_chunks = text_splitter.split_documents(documents)
 
82
 
83
  llm = LlamaCpp(model_path= model_path,
84
  n_ctx=4000,
85
+ max_tokens= 4000,
86
  n_gpu_layers = 40,
 
87
  callback_manager = callback_manager,
88
  verbose=True)
89
 
 
108
 
109
  rag_prompt_custom = PromptTemplate.from_template(prompt_template)
110
 
111
+ prompt = hub.pull("rlm/rag-prompt-mistral")
112
 
113
  conversation_chain = RetrievalQA.from_chain_type(
114
  llm,
 
157
  st.image(image_path, use_column_width=True, output_format='auto')
158
 
159
 
160
+ # Delete directory content
161
+ def delete_file(directory_path):
162
 
163
  # Check if the directory exists
164
  if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
 
176
  print(f"The directory {directory_path} does not exist.")
177
 
178
 
 
179
  def save_uploaded_file(uploaded_file):
180
  save_directory = "./documents/"
181
  file_path = os.path.join(save_directory, uploaded_file.name)
 
200
 
201
  def main():
202
  load_dotenv()
203
+ st.set_page_config(page_title="Randstad Chad Bot",
204
  page_icon=":books:")
205
  st.write(css, unsafe_allow_html=True)
206
 
 
218
 
219
 
220
  # Embedding Model
221
+ st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-base-en-v1.5",
222
  cache_dir="./embedding_model/")
223
 
224
  with st.sidebar:
 
240
  if st.button("Process"):
241
 
242
  # delete the old embeddings
243
+ delete_file(directory_path= './vectordb/')
244
+
245
+ # delete old documents
246
+ delete_file(directory_path="./documents/")
247
 
248
  # then Embedd new documents
249
  with st.spinner("Processing"):
 
253
  for file in docs:
254
  save_uploaded_file(file)
255
 
256
+
257
+ # using the helper function below lets load our dependencies
258
+ # Step 1 : Load the documents
259
+ # Step 2 : Break them into Chunks
260
+ # Step 3 : Create Embeddings and save them to Vector DB
261
+ # Step 4 : Get our conversation chain
262
+
263
  load_dependencies()
264
 
265
  # Load our model