Spaces:
Sleeping
Sleeping
AjiNiktech
commited on
Commit
•
c1d9e38
1
Parent(s):
5d778d9
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
import streamlit as st
|
2 |
-
from langchain_openai import ChatOpenAI
|
3 |
import os
|
4 |
import dotenv
|
|
|
5 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
6 |
from langchain_chroma import Chroma
|
|
|
7 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
8 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
9 |
from langchain_core.messages import HumanMessage, AIMessage
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain.document_loaders import PyPDFLoader
|
12 |
-
from fuzzywuzzy import process
|
13 |
|
14 |
# Set page config
|
15 |
st.set_page_config(page_title="Tbank Assistant", layout="wide")
|
@@ -32,6 +33,7 @@ if "OPENAI_API_KEY" in os.environ:
|
|
32 |
dotenv.load_dotenv()
|
33 |
chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
|
34 |
|
|
|
35 |
loader1 = PyPDFLoader("Tbank resources.pdf")
|
36 |
loader2 = PyPDFLoader("International Banking Services.pdf")
|
37 |
data1 = loader1.load()
|
@@ -39,7 +41,6 @@ if "OPENAI_API_KEY" in os.environ:
|
|
39 |
data = data1 + data2
|
40 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
41 |
all_splits = text_splitter.split_documents(data)
|
42 |
-
|
43 |
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
44 |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
|
45 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 6, "score_threshold": 0.5})
|
@@ -63,10 +64,11 @@ if "OPENAI_API_KEY" in os.environ:
|
|
63 |
12. Regularly refer to the provided PDFs for accurate, up-to-date information about Tbank's products and services.
|
64 |
13. Check for the basic Grammar and Spellings and understand if the spellings or grammar is slightly incorrect.
|
65 |
14. Understand the user query with different angle, analyze properly, check through the possible answers and then give the answer.
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
70 |
|
71 |
Your primary goal is to assist users with information directly related to Tbank, using only the website content and provided PDF documents. Avoid speculation and stick strictly to the provided information.
|
72 |
|
@@ -91,13 +93,14 @@ if "OPENAI_API_KEY" in os.environ:
|
|
91 |
|
92 |
document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
|
93 |
|
94 |
-
important_terms = ["Tbank", "Chairman", "CEO", "products", "services"]
|
95 |
|
96 |
return retriever, document_chain, important_terms
|
97 |
|
|
|
98 |
# Load components
|
99 |
with st.spinner("Initializing Tbank Assistant..."):
|
100 |
-
retriever, document_chain
|
101 |
|
102 |
# Initialize memory for each session
|
103 |
if "memory" not in st.session_state:
|
@@ -130,31 +133,31 @@ if "OPENAI_API_KEY" in os.environ:
|
|
130 |
|
131 |
with st.chat_message("assistant"):
|
132 |
message_placeholder = st.empty()
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
|
159 |
# Add assistant response to chat history
|
160 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|
|
1 |
import streamlit as st
|
2 |
+
from langchain_openai import ChatOpenAI
|
3 |
import os
|
4 |
import dotenv
|
5 |
+
from langchain_community.document_loaders import WebBaseLoader
|
6 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
7 |
from langchain_chroma import Chroma
|
8 |
+
from langchain_openai import OpenAIEmbeddings
|
9 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
10 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
11 |
from langchain_core.messages import HumanMessage, AIMessage
|
12 |
from langchain.memory import ConversationBufferMemory
|
13 |
from langchain.document_loaders import PyPDFLoader
|
|
|
14 |
|
15 |
# Set page config
|
16 |
st.set_page_config(page_title="Tbank Assistant", layout="wide")
|
|
|
33 |
dotenv.load_dotenv()
|
34 |
chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
|
35 |
|
36 |
+
#loader1 = WebBaseLoader("https://www.tbankltd.com/")
|
37 |
loader1 = PyPDFLoader("Tbank resources.pdf")
|
38 |
loader2 = PyPDFLoader("International Banking Services.pdf")
|
39 |
data1 = loader1.load()
|
|
|
41 |
data = data1 + data2
|
42 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
43 |
all_splits = text_splitter.split_documents(data)
|
|
|
44 |
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
45 |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
|
46 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 6, "score_threshold": 0.5})
|
|
|
64 |
12. Regularly refer to the provided PDFs for accurate, up-to-date information about Tbank's products and services.
|
65 |
13. Check for the basic Grammar and Spellings and understand if the spellings or grammar is slightly incorrect.
|
66 |
14. Understand the user query with different angle, analyze properly, check through the possible answers and then give the answer.
|
67 |
+
14. Be forgiving of minor spelling mistakes and grammatical errors in user queries. Try to understand the intent behind the question.
|
68 |
+
15. Maintain context from previous messages in the conversation. If a user asks about a person or topic mentioned earlier, refer back to that information.
|
69 |
+
16. If a user asks about a person using only a name or title, try to identify who they're referring to based on previous context or your knowledge base.
|
70 |
+
17. When answering questions about specific people, provide their full name and title if available.
|
71 |
+
|
72 |
|
73 |
Your primary goal is to assist users with information directly related to Tbank, using only the website content and provided PDF documents. Avoid speculation and stick strictly to the provided information.
|
74 |
|
|
|
93 |
|
94 |
document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
|
95 |
|
96 |
+
important_terms = ["Tbank", "Chairman", "CEO", "products", "services"]
|
97 |
|
98 |
return retriever, document_chain, important_terms
|
99 |
|
100 |
+
|
101 |
# Load components
|
102 |
with st.spinner("Initializing Tbank Assistant..."):
|
103 |
+
retriever, document_chain = initialize_components()
|
104 |
|
105 |
# Initialize memory for each session
|
106 |
if "memory" not in st.session_state:
|
|
|
133 |
|
134 |
with st.chat_message("assistant"):
|
135 |
message_placeholder = st.empty()
|
136 |
+
|
137 |
+
# Fuzzy match important terms
|
138 |
+
matched_term = fuzzy_match(prompt.lower(), important_terms)
|
139 |
+
if matched_term:
|
140 |
+
prompt = f"{prompt} (Matched term: {matched_term})"
|
141 |
+
|
142 |
+
|
143 |
+
# Retrieve relevant documents
|
144 |
+
docs = retriever.get_relevant_documents(prompt)
|
145 |
+
|
146 |
+
# Include previous messages for context
|
147 |
+
previous_messages = st.session_state.messages[-5:] # Last 5 messages
|
148 |
+
|
149 |
+
# Generate response
|
150 |
+
response = document_chain.invoke(
|
151 |
+
{
|
152 |
+
"context": docs,
|
153 |
+
"chat_history": st.session_state.memory.load_memory_variables({})["chat_history"],
|
154 |
+
"messages": [HumanMessage(content=msg["content"]) for msg in previous_messages] + [HumanMessage(content=prompt)],
|
155 |
+
}
|
156 |
+
)
|
157 |
+
|
158 |
+
# The response is already a string, so we can use it directly
|
159 |
+
full_response = response
|
160 |
+
message_placeholder.markdown(full_response)
|
161 |
|
162 |
# Add assistant response to chat history
|
163 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|