Spaces:
Sleeping
Sleeping
Syed Junaid Iqbal
commited on
Commit
•
d77386f
1
Parent(s):
2db6c26
Update app.py
Browse files
app.py
CHANGED
@@ -51,8 +51,8 @@ def get_text_chunks(documents):
|
|
51 |
IMPORTANT : If the chunks too small we will miss the context and if its too large we will have longer compute time
|
52 |
"""
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
54 |
-
chunk_size=
|
55 |
-
chunk_overlap=
|
56 |
)
|
57 |
|
58 |
st.session_state.text_chunks = text_splitter.split_documents(documents)
|
@@ -82,9 +82,8 @@ def get_conversation_chain():
|
|
82 |
|
83 |
llm = LlamaCpp(model_path= model_path,
|
84 |
n_ctx=4000,
|
85 |
-
max_tokens=
|
86 |
n_gpu_layers = 40,
|
87 |
-
n_batch = 512,
|
88 |
callback_manager = callback_manager,
|
89 |
verbose=True)
|
90 |
|
@@ -109,7 +108,7 @@ def get_conversation_chain():
|
|
109 |
|
110 |
rag_prompt_custom = PromptTemplate.from_template(prompt_template)
|
111 |
|
112 |
-
prompt = hub.pull("rlm/rag-prompt")
|
113 |
|
114 |
conversation_chain = RetrievalQA.from_chain_type(
|
115 |
llm,
|
@@ -158,8 +157,8 @@ def add_rounded_edges(image_path="./randstad_featuredimage.png", radius=30):
|
|
158 |
st.image(image_path, use_column_width=True, output_format='auto')
|
159 |
|
160 |
|
161 |
-
# Delete
|
162 |
-
def
|
163 |
|
164 |
# Check if the directory exists
|
165 |
if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
|
@@ -177,7 +176,6 @@ def delete_db(directory_path = './vectordb/'):
|
|
177 |
print(f"The directory {directory_path} does not exist.")
|
178 |
|
179 |
|
180 |
-
|
181 |
def save_uploaded_file(uploaded_file):
|
182 |
save_directory = "./documents/"
|
183 |
file_path = os.path.join(save_directory, uploaded_file.name)
|
@@ -202,7 +200,7 @@ def load_dependencies():
|
|
202 |
|
203 |
def main():
|
204 |
load_dotenv()
|
205 |
-
st.set_page_config(page_title="
|
206 |
page_icon=":books:")
|
207 |
st.write(css, unsafe_allow_html=True)
|
208 |
|
@@ -220,7 +218,7 @@ def main():
|
|
220 |
|
221 |
|
222 |
# Embedding Model
|
223 |
-
st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-
|
224 |
cache_dir="./embedding_model/")
|
225 |
|
226 |
with st.sidebar:
|
@@ -242,7 +240,10 @@ def main():
|
|
242 |
if st.button("Process"):
|
243 |
|
244 |
# delete the old embeddings
|
245 |
-
|
|
|
|
|
|
|
246 |
|
247 |
# then Embedd new documents
|
248 |
with st.spinner("Processing"):
|
@@ -252,13 +253,13 @@ def main():
|
|
252 |
for file in docs:
|
253 |
save_uploaded_file(file)
|
254 |
|
255 |
-
|
256 |
-
using the helper function below lets load our dependencies
|
257 |
-
Step 1 : Load the documents
|
258 |
-
Step 2 : Break them into Chunks
|
259 |
-
Step 3 : Create Embeddings and save them to Vector DB
|
260 |
-
Step 4 : Get our conversation chain
|
261 |
-
|
262 |
load_dependencies()
|
263 |
|
264 |
# Load our model
|
|
|
51 |
IMPORTANT : If the chunks too small we will miss the context and if its too large we will have longer compute time
|
52 |
"""
|
53 |
text_splitter = RecursiveCharacterTextSplitter(
|
54 |
+
chunk_size= 400,
|
55 |
+
chunk_overlap=50,
|
56 |
)
|
57 |
|
58 |
st.session_state.text_chunks = text_splitter.split_documents(documents)
|
|
|
82 |
|
83 |
llm = LlamaCpp(model_path= model_path,
|
84 |
n_ctx=4000,
|
85 |
+
max_tokens= 4000,
|
86 |
n_gpu_layers = 40,
|
|
|
87 |
callback_manager = callback_manager,
|
88 |
verbose=True)
|
89 |
|
|
|
108 |
|
109 |
rag_prompt_custom = PromptTemplate.from_template(prompt_template)
|
110 |
|
111 |
+
prompt = hub.pull("rlm/rag-prompt-mistral")
|
112 |
|
113 |
conversation_chain = RetrievalQA.from_chain_type(
|
114 |
llm,
|
|
|
157 |
st.image(image_path, use_column_width=True, output_format='auto')
|
158 |
|
159 |
|
160 |
+
# Delete directory content
|
161 |
+
def delete_file(directory_path):
|
162 |
|
163 |
# Check if the directory exists
|
164 |
if os.path.exists(directory_path) and len(os.listdir(directory_path)) > 0:
|
|
|
176 |
print(f"The directory {directory_path} does not exist.")
|
177 |
|
178 |
|
|
|
179 |
def save_uploaded_file(uploaded_file):
|
180 |
save_directory = "./documents/"
|
181 |
file_path = os.path.join(save_directory, uploaded_file.name)
|
|
|
200 |
|
201 |
def main():
|
202 |
load_dotenv()
|
203 |
+
st.set_page_config(page_title="Randstad Chad Bot",
|
204 |
page_icon=":books:")
|
205 |
st.write(css, unsafe_allow_html=True)
|
206 |
|
|
|
218 |
|
219 |
|
220 |
# Embedding Model
|
221 |
+
st.session_state.embeddings = FastEmbedEmbeddings( model_name= "BAAI/bge-base-en-v1.5",
|
222 |
cache_dir="./embedding_model/")
|
223 |
|
224 |
with st.sidebar:
|
|
|
240 |
if st.button("Process"):
|
241 |
|
242 |
# delete the old embeddings
|
243 |
+
delete_file(directory_path= './vectordb/')
|
244 |
+
|
245 |
+
# delete old documents
|
246 |
+
delete_file(directory_path="./documents/")
|
247 |
|
248 |
# then Embedd new documents
|
249 |
with st.spinner("Processing"):
|
|
|
253 |
for file in docs:
|
254 |
save_uploaded_file(file)
|
255 |
|
256 |
+
|
257 |
+
# using the helper function below lets load our dependencies
|
258 |
+
# Step 1 : Load the documents
|
259 |
+
# Step 2 : Break them into Chunks
|
260 |
+
# Step 3 : Create Embeddings and save them to Vector DB
|
261 |
+
# Step 4 : Get our conversation chain
|
262 |
+
|
263 |
load_dependencies()
|
264 |
|
265 |
# Load our model
|