Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -61,14 +61,6 @@ avatar_12 = get_image_base64("avatar_12.png")
|
|
61 |
icon_base64 = get_image_base64("clipboard.png")
|
62 |
|
63 |
|
64 |
-
# Load your sentiment analysis data (replace 'your_data.csv' with your actual file)
|
65 |
-
df = pd.read_csv('./data/Consumer_sentiment_analysis_results.csv')
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
config = configparser.ConfigParser()
|
73 |
# Set page to wide mode
|
74 |
st.set_page_config(layout="wide")
|
@@ -90,9 +82,14 @@ google_sheet_url = os.getenv("Google_Sheet")
|
|
90 |
sheet = client.open_by_url(google_sheet_url)
|
91 |
worksheet = sheet.get_worksheet(0)
|
92 |
|
|
|
|
|
93 |
|
94 |
-
|
|
|
|
|
95 |
|
|
|
96 |
|
97 |
# Function to create a copy-to-clipboard button
|
98 |
def create_copy_button(text_to_copy):
|
@@ -125,16 +122,6 @@ def create_copy_button(text_to_copy):
|
|
125 |
return copy_js
|
126 |
|
127 |
|
128 |
-
|
129 |
-
# Retrieve the API key from the environment variables
|
130 |
-
api_key = os.getenv("OPENAI_API_KEY")
|
131 |
-
|
132 |
-
# Check if the API key is available, if not, raise an error
|
133 |
-
if api_key is None:
|
134 |
-
raise ValueError("API key not found. Ensure that the OPENAI_API_KEY environment variable is set.")
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
# Create a Chroma database instance using the selected directory
|
139 |
def create_chroma_instance(directory):
|
140 |
# Create and return a Chroma database instance
|
@@ -145,9 +132,7 @@ def create_chroma_instance(directory):
|
|
145 |
vectordb = Chroma()
|
146 |
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
# Define the system message template
|
151 |
system_template = """You are an AI assistant created by Citizens Information.
|
152 |
Most important rule: You have no knowledge other than the below context.
|
153 |
Only use the below context to answer questions. If you don't know the answer from the context, say that you don't know.
|
@@ -178,19 +163,13 @@ messages = [
|
|
178 |
qa_prompt = ChatPromptTemplate.from_messages(messages)
|
179 |
|
180 |
|
181 |
-
|
182 |
-
|
183 |
# Define the K Value
|
184 |
k_value = 6
|
185 |
|
186 |
# Define the search_type
|
187 |
selected_search_type = 'similarity'
|
188 |
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
chat_history = []
|
193 |
-
user_query = ""
|
194 |
|
195 |
answer = "" # Initialize ai_response with a default value
|
196 |
|
@@ -202,10 +181,6 @@ def ask_alans_ai(query, vectordb, chat_history, aoc_qa):
|
|
202 |
# Call pdf_qa with the filtered chat history
|
203 |
result = aoc_qa.invoke({"question": query, "chat_history": filtered_chat_history, "vectordb": vectordb})
|
204 |
|
205 |
-
# Create the filenames string using the result
|
206 |
-
# filenames = "["+"][".join(set([x.metadata['filepath'] for x in result['source_documents']]))+"]"
|
207 |
-
|
208 |
-
|
209 |
answer = result["answer"]
|
210 |
|
211 |
# Append the new query and its answer to the original chat history
|
@@ -216,18 +191,14 @@ def ask_alans_ai(query, vectordb, chat_history, aoc_qa):
|
|
216 |
def clear_input_box():
|
217 |
st.session_state["new_item"] = ""
|
218 |
|
219 |
-
|
220 |
-
|
221 |
# Clean and prepare data for appending
|
222 |
def clean_string(s):
|
223 |
return s.replace("\n", " ").replace("\t", " ")
|
224 |
|
225 |
|
226 |
-
|
227 |
-
# Streamlit app
|
228 |
def main():
|
229 |
|
230 |
-
|
231 |
st.markdown(
|
232 |
"""
|
233 |
<style>
|
@@ -247,9 +218,6 @@ def main():
|
|
247 |
if 'selected_model' not in st.session_state:
|
248 |
st.session_state['selected_model'] = 'gpt-3.5-turbo'
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
# Function to generate a unique session ID
|
254 |
def generate_session_id():
|
255 |
if 'session_id' not in st.session_state:
|
@@ -274,9 +242,7 @@ def main():
|
|
274 |
</style>
|
275 |
""", unsafe_allow_html=True)
|
276 |
|
277 |
-
|
278 |
-
|
279 |
-
# Sidebar
|
280 |
st.sidebar.title("About Citizens Information Chatbot")
|
281 |
st.sidebar.write("""**Health, Social Welfare, Employment, Money and Tax, Moving Country, Returning to Ireland, Housing, Education and Training, Travel and Recreation, Environment, Government in Ireland, Consumer, Death and Bereavement, Family and Relationships, Justice**
|
282 |
<br><br>
|
@@ -299,29 +265,21 @@ def main():
|
|
299 |
<br><br>
|
300 |
**Using this chatbot means you accept these terms. For more detailed advice, consult the <a href="https://www.citizensinformation.ie/" target="_blank">Citizens Information Website</a>**""", unsafe_allow_html=True)
|
301 |
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
|
308 |
-
# Create
|
309 |
st.sidebar.header("Select AI Temperature:")
|
310 |
ai_temp = st.sidebar.slider(label="Temperature", min_value=0.0, max_value=1.0, value=0.0, step=0.1)
|
311 |
|
312 |
|
313 |
-
|
314 |
# Streamlit slider for selecting the value of k
|
315 |
st.sidebar.header("Select a K Value for Retrieval:")
|
316 |
k_value = st.sidebar.slider('K Value', min_value=1, max_value=20, value=6)
|
317 |
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
# Initialize the selected model in session state
|
322 |
if 'selected_model' not in st.session_state:
|
323 |
st.session_state.selected_model = 'gpt-3.5-turbo'
|
324 |
|
|
|
325 |
st.sidebar.header("Select Large Language Model")
|
326 |
model_options = [
|
327 |
'gpt-3.5-turbo',
|
@@ -334,15 +292,10 @@ def main():
|
|
334 |
st.session_state['selected_model'] = selected_model
|
335 |
|
336 |
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
# Initialize the selected_directory in session state
|
342 |
if 'selected_directory' not in st.session_state:
|
343 |
st.session_state.selected_directory = './db_recursive_word'
|
344 |
|
345 |
-
|
346 |
st.sidebar.header("Select Chroma Database")
|
347 |
|
348 |
# Define the dropdown options and corresponding directories
|
@@ -354,22 +307,14 @@ def main():
|
|
354 |
}
|
355 |
|
356 |
|
357 |
-
|
358 |
# Sidebar dropdown to select the database, with ChromaDB1 (./data) as the default
|
359 |
selected_db = st.sidebar.selectbox("Select Chroma Database", db_options, index=0) # Default to first model
|
360 |
-
# Display the selected value in a message
|
361 |
-
# st.write(f"Selection made: {selected_db}")
|
362 |
|
363 |
# Get the corresponding directory for the selected option
|
364 |
selected_directory = db_options[selected_db]
|
365 |
-
|
366 |
-
# st.write(f"corresponding directory: {selected_directory}")
|
367 |
-
|
368 |
# Initialize Chroma instance
|
369 |
vectordb = create_chroma_instance(selected_directory)
|
370 |
-
# Display the selected directory in a message
|
371 |
-
# st.write(f"corresponding vector db: {vectordb}")
|
372 |
-
|
373 |
|
374 |
# Initialize the selected search type in session state
|
375 |
if 'selected_search_type' not in st.session_state:
|
@@ -388,24 +333,11 @@ def main():
|
|
388 |
selected_search_type = search_type_options.get(selected_search_type, "similarity")
|
389 |
|
390 |
|
391 |
-
# Display the selected search typein a message
|
392 |
-
# st.write(f"Selection made: {selected_search_type}")
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
# Display avatars side by side with selection buttons
|
402 |
st.sidebar.header("Select an Avatar:")
|
403 |
|
404 |
col1, col2, col3 = st.sidebar.columns(3)
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
# Initialize the selected avatar in session state
|
410 |
if 'user_selected_avatar' not in st.session_state:
|
411 |
st.session_state.user_selected_avatar = avatar_1
|
@@ -454,8 +386,7 @@ def main():
|
|
454 |
if st.button("Select 12"):
|
455 |
st.session_state.user_selected_avatar = avatar_12
|
456 |
|
457 |
-
|
458 |
-
|
459 |
aoc_qa = ConversationalRetrievalChain.from_llm(
|
460 |
ChatOpenAI(temperature=ai_temp, model_name=selected_model),
|
461 |
retriever=vectordb.as_retriever(search_kwargs={'k': k_value}, search_type=selected_search_type),
|
@@ -464,11 +395,6 @@ def main():
|
|
464 |
verbose=False,
|
465 |
combine_docs_chain_kwargs={"prompt": qa_prompt})
|
466 |
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
# HTML for social media links with base64-encoded images
|
473 |
social_media_html = f"""
|
474 |
<p>Find us on social media:</p>
|
@@ -489,7 +415,6 @@ def main():
|
|
489 |
# Add social media links to sidebar
|
490 |
st.sidebar.markdown(social_media_html, unsafe_allow_html=True)
|
491 |
|
492 |
-
|
493 |
|
494 |
st.markdown("""
|
495 |
<style>
|
@@ -532,9 +457,6 @@ def main():
|
|
532 |
</style>
|
533 |
""", unsafe_allow_html=True)
|
534 |
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
# Custom CSS to change the focus style of st.text_area
|
539 |
custom_css = """
|
540 |
<style>
|
@@ -549,8 +471,6 @@ def main():
|
|
549 |
# Inject custom CSS with markdown
|
550 |
st.markdown(custom_css, unsafe_allow_html=True)
|
551 |
|
552 |
-
|
553 |
-
|
554 |
# Get the current date and time
|
555 |
current_datetime = datetime.now()
|
556 |
|
@@ -664,7 +584,7 @@ def main():
|
|
664 |
|
665 |
|
666 |
|
667 |
-
|
668 |
with st.container():
|
669 |
# Display chat history
|
670 |
for question, answer in st.session_state.chat_history:
|
@@ -690,17 +610,15 @@ def main():
|
|
690 |
st.empty()
|
691 |
|
692 |
|
693 |
-
|
694 |
# Your combined string with the current date included
|
695 |
combined_string = f"Question: {message}\n\nAnswer: {answer}\n\nDate: {date_string}\n\nhttps://www.citizensinformation.ie/"
|
696 |
# Create a list with the three strings
|
697 |
|
698 |
-
|
699 |
message_clean = clean_string(message)
|
700 |
answer_clean = clean_string(answer)
|
701 |
date_string_clean = clean_string(date_string)
|
702 |
|
703 |
-
# Check length
|
704 |
max_length = 50000
|
705 |
message_clean = message_clean[:max_length]
|
706 |
answer_clean = answer_clean[:max_length]
|
@@ -709,9 +627,6 @@ def main():
|
|
709 |
# Append the cleaned data to the worksheet
|
710 |
data_to_append = [message_clean, answer_clean, date_string, str(ai_temp), st.session_state['session_id'], st.session_state['selected_model'], str(k_value), selected_directory, selected_search_type]
|
711 |
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
|
716 |
# Create and display the copy button only if answer has content
|
717 |
if answer:
|
@@ -721,16 +636,8 @@ def main():
|
|
721 |
# Input fields to Google Sheet
|
722 |
worksheet.append_row(data_to_append)
|
723 |
|
724 |
-
|
725 |
-
|
726 |
# Run the Streamlit app
|
727 |
if __name__ == "__main__":
|
728 |
main()
|
729 |
|
730 |
-
# print("system_template is:", system_template, end="\n")
|
731 |
-
# print("aoc_qa is:", aoc_qa, end="\n")
|
732 |
-
# print("messages is:", messages, end="\n")
|
733 |
-
# print("qa_prompt is:", qa_prompt, end="\n")
|
734 |
-
|
735 |
-
|
736 |
-
|
|
|
61 |
icon_base64 = get_image_base64("clipboard.png")
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
config = configparser.ConfigParser()
|
65 |
# Set page to wide mode
|
66 |
st.set_page_config(layout="wide")
|
|
|
82 |
sheet = client.open_by_url(google_sheet_url)
|
83 |
worksheet = sheet.get_worksheet(0)
|
84 |
|
85 |
+
# Retrieve the API key from the environment variables
|
86 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
87 |
|
88 |
+
# Check if the API key is available, if not, raise an error
|
89 |
+
if api_key is None:
|
90 |
+
raise ValueError("API key not found. Ensure that the OPENAI_API_KEY environment variable is set.")
|
91 |
|
92 |
+
aoc_qa = None
|
93 |
|
94 |
# Function to create a copy-to-clipboard button
|
95 |
def create_copy_button(text_to_copy):
|
|
|
122 |
return copy_js
|
123 |
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
# Create a Chroma database instance using the selected directory
|
126 |
def create_chroma_instance(directory):
|
127 |
# Create and return a Chroma database instance
|
|
|
132 |
vectordb = Chroma()
|
133 |
|
134 |
|
135 |
+
# Define the system message template (Prompt Template)
|
|
|
|
|
136 |
system_template = """You are an AI assistant created by Citizens Information.
|
137 |
Most important rule: You have no knowledge other than the below context.
|
138 |
Only use the below context to answer questions. If you don't know the answer from the context, say that you don't know.
|
|
|
163 |
qa_prompt = ChatPromptTemplate.from_messages(messages)
|
164 |
|
165 |
|
|
|
|
|
166 |
# Define the K Value
|
167 |
k_value = 6
|
168 |
|
169 |
# Define the search_type
|
170 |
selected_search_type = 'similarity'
|
171 |
|
|
|
|
|
|
|
172 |
chat_history = []
|
|
|
173 |
|
174 |
answer = "" # Initialize ai_response with a default value
|
175 |
|
|
|
181 |
# Call pdf_qa with the filtered chat history
|
182 |
result = aoc_qa.invoke({"question": query, "chat_history": filtered_chat_history, "vectordb": vectordb})
|
183 |
|
|
|
|
|
|
|
|
|
184 |
answer = result["answer"]
|
185 |
|
186 |
# Append the new query and its answer to the original chat history
|
|
|
191 |
def clear_input_box():
|
192 |
st.session_state["new_item"] = ""
|
193 |
|
|
|
|
|
194 |
# Clean and prepare data for appending
|
195 |
def clean_string(s):
|
196 |
return s.replace("\n", " ").replace("\t", " ")
|
197 |
|
198 |
|
199 |
+
###################### Streamlit app ####################################################
|
|
|
200 |
def main():
|
201 |
|
|
|
202 |
st.markdown(
|
203 |
"""
|
204 |
<style>
|
|
|
218 |
if 'selected_model' not in st.session_state:
|
219 |
st.session_state['selected_model'] = 'gpt-3.5-turbo'
|
220 |
|
|
|
|
|
|
|
221 |
# Function to generate a unique session ID
|
222 |
def generate_session_id():
|
223 |
if 'session_id' not in st.session_state:
|
|
|
242 |
</style>
|
243 |
""", unsafe_allow_html=True)
|
244 |
|
245 |
+
######## Sidebar ##############
|
|
|
|
|
246 |
st.sidebar.title("About Citizens Information Chatbot")
|
247 |
st.sidebar.write("""**Health, Social Welfare, Employment, Money and Tax, Moving Country, Returning to Ireland, Housing, Education and Training, Travel and Recreation, Environment, Government in Ireland, Consumer, Death and Bereavement, Family and Relationships, Justice**
|
248 |
<br><br>
|
|
|
265 |
<br><br>
|
266 |
**Using this chatbot means you accept these terms. For more detailed advice, consult the <a href="https://www.citizensinformation.ie/" target="_blank">Citizens Information Website</a>**""", unsafe_allow_html=True)
|
267 |
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
+
# Create an AI Temp slider widget in the sidebar
|
270 |
st.sidebar.header("Select AI Temperature:")
|
271 |
ai_temp = st.sidebar.slider(label="Temperature", min_value=0.0, max_value=1.0, value=0.0, step=0.1)
|
272 |
|
273 |
|
|
|
274 |
# Streamlit slider for selecting the value of k
|
275 |
st.sidebar.header("Select a K Value for Retrieval:")
|
276 |
k_value = st.sidebar.slider('K Value', min_value=1, max_value=20, value=6)
|
277 |
|
|
|
|
|
|
|
278 |
# Initialize the selected model in session state
|
279 |
if 'selected_model' not in st.session_state:
|
280 |
st.session_state.selected_model = 'gpt-3.5-turbo'
|
281 |
|
282 |
+
# Create an LLM dropdown select in the sidebar
|
283 |
st.sidebar.header("Select Large Language Model")
|
284 |
model_options = [
|
285 |
'gpt-3.5-turbo',
|
|
|
292 |
st.session_state['selected_model'] = selected_model
|
293 |
|
294 |
|
|
|
|
|
|
|
|
|
295 |
# Initialize the selected_directory in session state
|
296 |
if 'selected_directory' not in st.session_state:
|
297 |
st.session_state.selected_directory = './db_recursive_word'
|
298 |
|
|
|
299 |
st.sidebar.header("Select Chroma Database")
|
300 |
|
301 |
# Define the dropdown options and corresponding directories
|
|
|
307 |
}
|
308 |
|
309 |
|
|
|
310 |
# Sidebar dropdown to select the database, with ChromaDB1 (./data) as the default
|
311 |
selected_db = st.sidebar.selectbox("Select Chroma Database", db_options, index=0) # Default to first model
|
|
|
|
|
312 |
|
313 |
# Get the corresponding directory for the selected option
|
314 |
selected_directory = db_options[selected_db]
|
315 |
+
|
|
|
|
|
316 |
# Initialize Chroma instance
|
317 |
vectordb = create_chroma_instance(selected_directory)
|
|
|
|
|
|
|
318 |
|
319 |
# Initialize the selected search type in session state
|
320 |
if 'selected_search_type' not in st.session_state:
|
|
|
333 |
selected_search_type = search_type_options.get(selected_search_type, "similarity")
|
334 |
|
335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
# Display avatars side by side with selection buttons
|
337 |
st.sidebar.header("Select an Avatar:")
|
338 |
|
339 |
col1, col2, col3 = st.sidebar.columns(3)
|
340 |
|
|
|
|
|
|
|
341 |
# Initialize the selected avatar in session state
|
342 |
if 'user_selected_avatar' not in st.session_state:
|
343 |
st.session_state.user_selected_avatar = avatar_1
|
|
|
386 |
if st.button("Select 12"):
|
387 |
st.session_state.user_selected_avatar = avatar_12
|
388 |
|
389 |
+
############ Set up the LangChain Conversational Retrieval Chain ################
|
|
|
390 |
aoc_qa = ConversationalRetrievalChain.from_llm(
|
391 |
ChatOpenAI(temperature=ai_temp, model_name=selected_model),
|
392 |
retriever=vectordb.as_retriever(search_kwargs={'k': k_value}, search_type=selected_search_type),
|
|
|
395 |
verbose=False,
|
396 |
combine_docs_chain_kwargs={"prompt": qa_prompt})
|
397 |
|
|
|
|
|
|
|
|
|
|
|
398 |
# HTML for social media links with base64-encoded images
|
399 |
social_media_html = f"""
|
400 |
<p>Find us on social media:</p>
|
|
|
415 |
# Add social media links to sidebar
|
416 |
st.sidebar.markdown(social_media_html, unsafe_allow_html=True)
|
417 |
|
|
|
418 |
|
419 |
st.markdown("""
|
420 |
<style>
|
|
|
457 |
</style>
|
458 |
""", unsafe_allow_html=True)
|
459 |
|
|
|
|
|
|
|
460 |
# Custom CSS to change the focus style of st.text_area
|
461 |
custom_css = """
|
462 |
<style>
|
|
|
471 |
# Inject custom CSS with markdown
|
472 |
st.markdown(custom_css, unsafe_allow_html=True)
|
473 |
|
|
|
|
|
474 |
# Get the current date and time
|
475 |
current_datetime = datetime.now()
|
476 |
|
|
|
584 |
|
585 |
|
586 |
|
587 |
+
############# Container for chat messages ##############
|
588 |
with st.container():
|
589 |
# Display chat history
|
590 |
for question, answer in st.session_state.chat_history:
|
|
|
610 |
st.empty()
|
611 |
|
612 |
|
|
|
613 |
# Your combined string with the current date included
|
614 |
combined_string = f"Question: {message}\n\nAnswer: {answer}\n\nDate: {date_string}\n\nhttps://www.citizensinformation.ie/"
|
615 |
# Create a list with the three strings
|
616 |
|
|
|
617 |
message_clean = clean_string(message)
|
618 |
answer_clean = clean_string(answer)
|
619 |
date_string_clean = clean_string(date_string)
|
620 |
|
621 |
+
# Check length
|
622 |
max_length = 50000
|
623 |
message_clean = message_clean[:max_length]
|
624 |
answer_clean = answer_clean[:max_length]
|
|
|
627 |
# Append the cleaned data to the worksheet
|
628 |
data_to_append = [message_clean, answer_clean, date_string, str(ai_temp), st.session_state['session_id'], st.session_state['selected_model'], str(k_value), selected_directory, selected_search_type]
|
629 |
|
|
|
|
|
|
|
630 |
|
631 |
# Create and display the copy button only if answer has content
|
632 |
if answer:
|
|
|
636 |
# Input fields to Google Sheet
|
637 |
worksheet.append_row(data_to_append)
|
638 |
|
639 |
+
|
|
|
640 |
# Run the Streamlit app
|
641 |
if __name__ == "__main__":
|
642 |
main()
|
643 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|