Spaces:
Runtime error
Runtime error
Upload chatbot_app_poc.py
Browse files- chatbot_app_poc.py +380 -0
chatbot_app_poc.py
ADDED
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import datetime
|
3 |
+
import numpy as np
|
4 |
+
import gspread
|
5 |
+
from google.oauth2 import service_account
|
6 |
+
|
7 |
+
# Add this function to create a Google Sheets service
|
8 |
+
def create_google_sheets_service(json_credentials_path, scopes):
|
9 |
+
creds = service_account.Credentials.from_service_account_file(json_credentials_path).with_scopes(scopes)
|
10 |
+
return gspread.authorize(creds)
|
11 |
+
|
12 |
+
|
13 |
+
from datetime import datetime
|
14 |
+
import pytz
|
15 |
+
import requests
|
16 |
+
|
17 |
+
def get_user_ip():
|
18 |
+
try:
|
19 |
+
response = requests.get("https://api.ipify.org?format=json")
|
20 |
+
ip = response.json()['ip']
|
21 |
+
except:
|
22 |
+
ip = "Unknown"
|
23 |
+
return ip
|
24 |
+
|
25 |
+
|
26 |
+
from google.api_core.retry import Retry
|
27 |
+
from google.api_core import retry
|
28 |
+
|
29 |
+
|
30 |
+
def write_data_to_google_sheet(service, spreadsheet_url, sheet_name, data):
|
31 |
+
sheet = service.open_by_url(spreadsheet_url).worksheet(sheet_name)
|
32 |
+
|
33 |
+
# Add header row
|
34 |
+
header_row = ["Questions", "Answers", "Timestamp", "User IP"]
|
35 |
+
for i, header in enumerate(header_row, start=1):
|
36 |
+
sheet.update_cell(1, i, header)
|
37 |
+
|
38 |
+
# Set timezone to Saudi Arabia time
|
39 |
+
saudi_timezone = pytz.timezone("Asia/Riyadh")
|
40 |
+
|
41 |
+
# Get user's IP address
|
42 |
+
user_ip = get_user_ip()
|
43 |
+
|
44 |
+
# Find the next empty row
|
45 |
+
next_row = len(sheet.get_all_values()) + 1
|
46 |
+
|
47 |
+
# Write data to the Google Sheet
|
48 |
+
for i, item in enumerate(data, start=next_row):
|
49 |
+
sheet.update_cell(i, 1, item['query'])
|
50 |
+
sheet.update_cell(i, 2, item['response'])
|
51 |
+
saudi_time = datetime.now(saudi_timezone).strftime("%Y-%m-%d %H:%M:%S")
|
52 |
+
sheet.update_cell(i, 3, saudi_time)
|
53 |
+
sheet.update_cell(i, 4, user_ip)
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
# Add these lines to the beginning of your `app` function
|
62 |
+
json_credentials_path = 'credentials.json' # Replace with the path to your JSON credentials file
|
63 |
+
scopes = ['https://www.googleapis.com/auth/spreadsheets']
|
64 |
+
|
65 |
+
service = create_google_sheets_service(json_credentials_path, scopes)
|
66 |
+
spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1R1AUf0Bzk5fLTpV6vk023DW7FV19kBT3e1lPWysDW2Q/edit#gid=1555077198'
|
67 |
+
sheet_name = 'Sheet1' # Replace with the name of the sheet where you want to store the data
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
#@title State of Union Text
|
74 |
+
#state_of_the_union = """ txt_file"""
|
75 |
+
|
76 |
+
# Environment Vars
|
77 |
+
#os.environ["OPENAI_API_KEY"] = openai_api_key
|
78 |
+
import os
|
79 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
80 |
+
os.environ['OPENAI_API_KEY'] = st.secrets['OPENAI_API_KEY']
|
81 |
+
|
82 |
+
|
83 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
84 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
85 |
+
from langchain.vectorstores.faiss import FAISS
|
86 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
87 |
+
from langchain.text_splitter import CharacterTextSplitter
|
88 |
+
from langchain import OpenAI, VectorDBQA
|
89 |
+
from langdetect import detect
|
90 |
+
from googletrans import Translator
|
91 |
+
from langchain.vectorstores import Chroma
|
92 |
+
from langchain.document_loaders import PyPDFLoader
|
93 |
+
from langchain.chains import RetrievalQA
|
94 |
+
from langchain.llms import OpenAI
|
95 |
+
from langchain.document_loaders import TextLoader
|
96 |
+
#from langchain.translator import OpenAITranslator
|
97 |
+
import openai
|
98 |
+
from datetime import datetime
|
99 |
+
import pandas as pd
|
100 |
+
import pytz
|
101 |
+
|
102 |
+
|
103 |
+
import streamlit as st
|
104 |
+
from hashlib import sha256
|
105 |
+
|
106 |
+
def create_hashed_password(password):
|
107 |
+
return sha256(password.encode('utf-8')).hexdigest()
|
108 |
+
|
109 |
+
def login():
|
110 |
+
st.title('Please Login')
|
111 |
+
|
112 |
+
entered_username = st.text_input('Username')
|
113 |
+
entered_password = st.text_input('Password', type='password')
|
114 |
+
|
115 |
+
if st.button('Login'):
|
116 |
+
names = ['User', 'Customer']
|
117 |
+
usernames = ['warba', 'Warba']
|
118 |
+
passwords = ['warba123', 'warba123']
|
119 |
+
|
120 |
+
hashed_passwords = [create_hashed_password(password) for password in passwords]
|
121 |
+
|
122 |
+
for name, username, hashed_password in zip(names, usernames, hashed_passwords):
|
123 |
+
if username == entered_username and hashed_password == create_hashed_password(entered_password):
|
124 |
+
st.session_state["authentication_status"] = True
|
125 |
+
st.session_state["name"] = name
|
126 |
+
break
|
127 |
+
else:
|
128 |
+
st.session_state["authentication_status"] = False
|
129 |
+
|
130 |
+
if st.session_state.get("authentication_status", None):
|
131 |
+
return True
|
132 |
+
elif st.session_state["authentication_status"] == False:
|
133 |
+
st.error('Sorry, wrong login credentials')
|
134 |
+
return False
|
135 |
+
elif st.session_state["authentication_status"] == None:
|
136 |
+
st.warning('Please enter your username and password')
|
137 |
+
return False
|
138 |
+
else:
|
139 |
+
return False
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
144 |
+
#texts = text_splitter.split_text(state_of_the_union)
|
145 |
+
#loader = PyPDFLoader("warba_5_6.pdf")
|
146 |
+
#documents = loader.load()
|
147 |
+
#texts = text_splitter.split_documents(documents)
|
148 |
+
|
149 |
+
#################
|
150 |
+
from langchain.chat_models import ChatOpenAI
|
151 |
+
from langchain.chains import ConversationalRetrievalChain
|
152 |
+
from langchain.prompts.prompt import PromptTemplate
|
153 |
+
from langchain.callbacks import get_openai_callback
|
154 |
+
qa_template = """
|
155 |
+
You are a helpful AI assistant named Q&A bot developed and created by Warba Bank Developers. The user gives you a file its content is represented by the following pieces of context, use them to answer the question at the end.
|
156 |
+
If you don't know the answer, just say you don't know. Do NOT try to make up an answer.
|
157 |
+
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
158 |
+
Use as much detail as possible when responding.
|
159 |
+
|
160 |
+
context: {context}
|
161 |
+
=========
|
162 |
+
question: {question}
|
163 |
+
======
|
164 |
+
"""
|
165 |
+
QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["context","question" ])
|
166 |
+
|
167 |
+
#loader = CSVLoader("Warba_QA_bot_full_dataset_June_14_csv.csv", csv_args = {"delimiter": ','})
|
168 |
+
#documents = loader.load()
|
169 |
+
loader = CSVLoader(file_path="Warba_QA_bot_full_dataset_June_14_csv_updated.csv", encoding="utf-8",csv_args={'delimiter': ',',})
|
170 |
+
data = loader.load()
|
171 |
+
#text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
172 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap = 0,length_function = len,)
|
173 |
+
embeddings = OpenAIEmbeddings()
|
174 |
+
vectors = FAISS.from_documents(data, embeddings)
|
175 |
+
chain = ConversationalRetrievalChain.from_llm(llm = ChatOpenAI(temperature=0.0,model_name='gpt-3.5-turbo', openai_api_key=st.secrets['OPENAI_API_KEY']),
|
176 |
+
retriever=vectors.as_retriever(),max_tokens_limit=4097,combine_docs_chain_kwargs={"prompt": QA_PROMPT})
|
177 |
+
#faissIndex = FAISS.from_documents(docs, OpenAIEmbeddings())
|
178 |
+
#faissIndex.save_local("faiss_warba_docs")
|
179 |
+
#from langchain.chains import RetrievalQA
|
180 |
+
#from langchain.chat_models import ChatOpenAI
|
181 |
+
#chatbot = RetrievalQA.from_chain_type(llm=ChatOpenAI(openai_api_key=st.secrets['OPENAI_API_KEY'],temperature=0, model_name="gpt-3.5-turbo", max_tokens=256), chain_type="stuff", retriever=FAISS.load_local("faiss_warba_docs", OpenAIEmbeddings()).as_retriever(search_type="similarity", search_kwargs={"k":1}))
|
182 |
+
|
183 |
+
|
184 |
+
###embeddings = OpenAIEmbeddings()
|
185 |
+
###text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
186 |
+
###texts = text_splitter.split_text(state_of_the_union)
|
187 |
+
###vectorstore = FAISS.from_texts(texts, embeddings)
|
188 |
+
|
189 |
+
#import numpy as np
|
190 |
+
|
191 |
+
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
192 |
+
#texts = text_splitter.split_text(state_of_the_union)
|
193 |
+
|
194 |
+
#embeddings = OpenAIEmbeddings()
|
195 |
+
#vectorstore = FAISS.from_texts(texts, embeddings)
|
196 |
+
|
197 |
+
|
198 |
+
#db = Chroma.from_documents(texts, embeddings)
|
199 |
+
#retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
|
200 |
+
#llm = OpenAI(model_name='gpt-3.5-turbo',temperature=0, max_tokens=256 )
|
201 |
+
#qa = VectorDBQA.from_chain_type(llm, chain_type="stuff", vectorstore=vectorstore)
|
202 |
+
from langchain.chat_models import ChatOpenAI
|
203 |
+
#qa = VectorDBQA.from_chain_type(llm=OpenAI(model_name='gpt-3.5-turbo',temperature=0.2,max_tokens=256), chain_type="stuff", vectorstore=vectorstore)
|
204 |
+
#qa = VectorDBQA.from_chain_type(llm=ChatOpenAI(model_name='gpt-3.5-turbo',temperature=0.2,max_tokens=256), chain_type="stuff", vectorstore=vectorstore)
|
205 |
+
#qa = RetrievalQA.from_chain_type(llm=OpenAI(model_name='gpt-3.5-turbo'), chain_type="stuff", retriever=retriever, return_source_documents=True)
|
206 |
+
from langchain.chains import load_chain
|
207 |
+
|
208 |
+
#translator = OpenAITranslator()
|
209 |
+
from googletrans import Translator
|
210 |
+
|
211 |
+
#chain = load_chain("lc://chains/vector-db-qa/stuff/chain.json", vectorstore=vectorstore)
|
212 |
+
#from langchain.chains.question_answering import load_qa_chain
|
213 |
+
#chain = load_qa_chain(llm=OpenAI(model_name='gpt-3.5-turbo'), chain_type="stuff")
|
214 |
+
|
215 |
+
|
216 |
+
from googletrans import Translator
|
217 |
+
|
218 |
+
def translate_to_arabic(text):
|
219 |
+
translator = Translator()
|
220 |
+
result = translator.translate(text, dest='ar')
|
221 |
+
return result.text
|
222 |
+
|
223 |
+
|
224 |
+
|
225 |
+
translator = Translator()
|
226 |
+
|
227 |
+
|
228 |
+
from langdetect import detect
|
229 |
+
|
230 |
+
|
231 |
+
import time
|
232 |
+
import streamlit as st
|
233 |
+
from datetime import datetime
|
234 |
+
import pytz
|
235 |
+
|
236 |
+
#def run_chain(query):
|
237 |
+
#return chain.run(query)
|
238 |
+
|
239 |
+
def run_chain(chat_history, question):
|
240 |
+
return chain.run({'chat_history': chat_history, 'question': question})
|
241 |
+
|
242 |
+
|
243 |
+
def clear_conversation():
|
244 |
+
if (
|
245 |
+
st.button("🧹 Clear conversation", use_container_width=True)
|
246 |
+
or "history" not in st.session_state
|
247 |
+
):
|
248 |
+
st.session_state.history = []
|
249 |
+
|
250 |
+
def download_conversation():
|
251 |
+
conversation_df = pd.DataFrame(
|
252 |
+
st.session_state.history, columns=["timestamp", "query", "response"]
|
253 |
+
)
|
254 |
+
csv = conversation_df.to_csv(index=False)
|
255 |
+
|
256 |
+
st.download_button(
|
257 |
+
label="💾 Download conversation",
|
258 |
+
data=csv,
|
259 |
+
file_name=f"conversation_{datetime.now().strftime('%Y%m%d%H%M')}.csv",
|
260 |
+
mime="text/csv",
|
261 |
+
use_container_width=True,
|
262 |
+
)
|
263 |
+
|
264 |
+
def app():
|
265 |
+
st.set_page_config(page_title="Q&A Bot", page_icon=":guardsman:")
|
266 |
+
|
267 |
+
st.markdown("""
|
268 |
+
<style>
|
269 |
+
body {
|
270 |
+
background-color: #f0f2f6;
|
271 |
+
}
|
272 |
+
.title {
|
273 |
+
font-size: 25px;
|
274 |
+
font-weight: bold;
|
275 |
+
color: #151f6d;
|
276 |
+
text-align: center;
|
277 |
+
}
|
278 |
+
.response-block {
|
279 |
+
background-color: #151f6d;
|
280 |
+
padding: 10px;
|
281 |
+
color: white;
|
282 |
+
border-radius: 5px;
|
283 |
+
margin-top: 10px;
|
284 |
+
text-align: center;
|
285 |
+
font-size: 16px; # Increase font size by one degree
|
286 |
+
}
|
287 |
+
.stTextInput>div>div>input {
|
288 |
+
background-color: white;
|
289 |
+
}
|
290 |
+
.stButton>button {
|
291 |
+
width: 100%;
|
292 |
+
color: white;
|
293 |
+
background-color: #151f6d;
|
294 |
+
}
|
295 |
+
</style>
|
296 |
+
""", unsafe_allow_html=True)
|
297 |
+
|
298 |
+
st.markdown('<div class="title">Questions and Answers Bot for Warba Bank.</div>', unsafe_allow_html=True)
|
299 |
+
|
300 |
+
st.write("") # Empty line for spacing
|
301 |
+
st.write("") # Empty line for spacing
|
302 |
+
|
303 |
+
sidebar = st.sidebar
|
304 |
+
show_history = sidebar.checkbox("Show conversation history", value=False)
|
305 |
+
|
306 |
+
# Add the checkbox for multi-line input in the sidebar
|
307 |
+
multiline = sidebar.checkbox('Use multi-line input')
|
308 |
+
|
309 |
+
with sidebar.expander("More options"):
|
310 |
+
clear_conversation()
|
311 |
+
download_conversation()
|
312 |
+
|
313 |
+
col1, col2 = st.columns([3,1])
|
314 |
+
with col1:
|
315 |
+
# Depending on the state of the checkbox, display a single-line input or a multi-line input
|
316 |
+
if multiline:
|
317 |
+
query = st.text_area("Enter a question and get an answer from Q&A Bot:")
|
318 |
+
else:
|
319 |
+
query = st.text_input("Enter a question and get an answer from Q&A Bot:")
|
320 |
+
|
321 |
+
thinking_message_text = col1.empty() # Create a placeholder for the 'Thinking...' text
|
322 |
+
thinking_message_bar = col1.empty() # Create a placeholder for the progress bar
|
323 |
+
|
324 |
+
response_block = col1.empty() # Create a placeholder for the response block
|
325 |
+
|
326 |
+
with col2:
|
327 |
+
st.write("") # Empty line for spacing
|
328 |
+
st.write("") # Empty line for spacing
|
329 |
+
if st.button("Ask"):
|
330 |
+
if query:
|
331 |
+
# Start progress bar
|
332 |
+
progress_bar = thinking_message_bar.progress(0)
|
333 |
+
for i in range(100):
|
334 |
+
# Update the progress bar with each iteration.
|
335 |
+
time.sleep(0.01) # add delay for demonstration
|
336 |
+
progress_bar.progress(i + 1)
|
337 |
+
thinking_message_text.markdown(f'Thinking... {i+1}%', unsafe_allow_html=True)
|
338 |
+
|
339 |
+
sa_time = datetime.now(pytz.timezone('Asia/Riyadh'))
|
340 |
+
timestamp = sa_time.strftime('%Y-%m-%d %H:%M:%S')
|
341 |
+
#response = run_chain(query)
|
342 |
+
response = run_chain("", query)
|
343 |
+
|
344 |
+
|
345 |
+
# Clear the progress bar and the 'Thinking...' text
|
346 |
+
thinking_message_bar.empty()
|
347 |
+
thinking_message_text.empty()
|
348 |
+
|
349 |
+
# Display the response
|
350 |
+
response_block.markdown(f'<div class="response-block"> Answer: {response}</div>', unsafe_allow_html=True)
|
351 |
+
conversation_item = {
|
352 |
+
'timestamp': timestamp,
|
353 |
+
'query': query,
|
354 |
+
'response': response
|
355 |
+
}
|
356 |
+
st.session_state.history.append(conversation_item)
|
357 |
+
|
358 |
+
|
359 |
+
# Write data to Google Sheet
|
360 |
+
write_data_to_google_sheet(service, spreadsheet_url, sheet_name, [conversation_item])
|
361 |
+
|
362 |
+
# Only show conversation history if checkbox is checked
|
363 |
+
if show_history:
|
364 |
+
st.write('\n\n## Conversation history')
|
365 |
+
for item in reversed(st.session_state.history):
|
366 |
+
st.write(f'### Question: {item["query"]}')
|
367 |
+
st.write(f'### Answer: {item["response"]}')
|
368 |
+
st.write('---')
|
369 |
+
|
370 |
+
|
371 |
+
if __name__ == "__main__":
|
372 |
+
#st.set_page_config(page_title="My Streamlit App")
|
373 |
+
if 'authentication_status' not in st.session_state or st.session_state["authentication_status"] == False:
|
374 |
+
login_successful = login()
|
375 |
+
if login_successful:
|
376 |
+
st.experimental_rerun()
|
377 |
+
else:
|
378 |
+
app()
|
379 |
+
|
380 |
+
|