Spaces:
Configuration error

truevis commited on
Commit
fe0b5ee
1 Parent(s): 5fe11e1
Files changed (5) hide show
  1. .streamlit/config.toml +6 -0
  2. a-icon.svg +1 -0
  3. app.py +266 -0
  4. property-icon.ico +0 -0
  5. q-icon.svg +1 -0
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ base = "light"
3
+ [client]
4
+ showSidebarNavigation = false
5
+
6
+
a-icon.svg ADDED
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_groq import ChatGroq
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ import pinecone
5
+ import hmac
6
+ import tiktoken
7
+ import re
8
+ import openai
9
+ # import firebase_stripe_auth
10
+
11
+ EMBEDDINGS_MODEL = 'text-embedding-3-small'
12
+ groq_api_key = st.secrets["GROQ_API_KEY"]
13
+ pinecone_api_key = st.secrets["PINECONE_API_KEY"]
14
+ pinecone_index_name = st.secrets["PINECONE_INDEX_NAME"]
15
+ streamlit_api_key = st.secrets["STREAMLIT_API_KEY"]
16
+
17
+
18
+ system_prompt = """<system_prompt> You are an AI assistant with expertise in U.S. Department of Housing and Urban Development (HUD) programs and policies. Provide accurate, helpful information to assist people working with HUD and its clients in the United States. <knowledge> Understand HUD's major program offices: - Community Planning and Development (CDBG, HOME, Shelter Plus Care, ESG, Mod Rehab SRO, HOPWA) - Housing (FHA, Fannie Mae/Freddie Mac, Section 202, Section 811, Project-Based Section 8) - Public and Indian Housing (Public housing, HOPE VI, Housing Choice Vouchers, Native American/Hawaiian/Alaskan housing grants) - Fair Housing and Equal Opportunity - Policy Development and Research - Ginnie Mae, Healthy Homes and Lead Hazard Control, Partnership for Advancing Technology in Housing </knowledge> <communication> - Assistant answers ONLY in the context of U.S. Department of Housing and Urban Development (HUD) expertise and issues within the United States - Explain HUD programs clearly and accurately - Provide relevant details and suggest resources - Maintain professional, respectful, and empathetic communication - Use a neutral and professional tone in all responses - List each URL verbatim as it appears after "Source: ". Example: If the data contain "Source: https://docmind.ai/hud/pages/CFR-2018-title24-vol4-part982_page_45.pdf", respond with "\nSource 1: https://docmind.ai/hud/pages/CFR-2018-title24-vol4-part982_page_45.pdf" and so on. </communication> <task_completion> - Answer questions about HUD programs and policies in the United States - Determine client eligibility for assistance in the United States - Guide applications and participation in HUD programs within the United States - Troubleshoot HUD-related housing issues in the United States - Explain tenant and landlord rights and responsibilities under HUD in the United States </task_completion> Be a knowledgeable, helpful resource referencing the <context> about <query> specifically for HUD issues in the United States. </system_prompt>"""
19
+
20
+ questions = [
21
+ "What are the eligibility requirements for the Housing Choice Voucher Program (Section 8), and how can I assist a client in applying for this program?",
22
+ "My client is facing discrimination in his HUD-assisted housing based on his race. What steps can he take to file a complaint with the Office of Fair Housing and Equal Opportunity, and what support can HUD provide?",
23
+ "Can you explain the differences between the public housing program and the Section 202 Supportive Housing for the Elderly program, and help me determine which one might be a better fit for my elderly client?",
24
+ "I'm working with a community organization that wants to apply for a Community Development Block Grant (CDBG) to revitalize a low-income neighborhood. What are some key things we should know about the application process and how the funds can be used?",
25
+ "List the PDF source pages and their URLS for the previous chat response from Assistant. List the full exact URLs from the source data.",
26
+ "summarize the previous chat response from Assistant",
27
+ "generate a draft email focusing directly on the subject or main question. Omit any initial greetings or pleasantries, such as 'I hope this message finds you well.'"
28
+ ]
29
+
30
+ def check_password():
31
+ """Returns `True` if the user had the correct password."""
32
+
33
+ def password_entered():
34
+ """Checks whether a password entered by the user is correct."""
35
+ if hmac.compare_digest(st.session_state["password"], st.secrets["password"]):
36
+ st.session_state["password_correct"] = True
37
+ del st.session_state["password"] # Don't store the password.
38
+ else:
39
+ st.session_state["password_correct"] = False
40
+
41
+ # Return True if the password is validated.
42
+ if st.session_state.get("password_correct", False):
43
+ return True
44
+
45
+ # Show input for password.
46
+ st.text_input(
47
+ "Password for HUD Assistant", type="password", on_change=password_entered, key="password"
48
+ )
49
+ if "password_correct" in st.session_state:
50
+ st.error("😕 Password incorrect")
51
+ return False
52
+
53
+
54
+ st.set_page_config(
55
+ page_title="HUD Assistant",
56
+ page_icon="property-icon.ico",
57
+ layout="wide",
58
+ initial_sidebar_state="expanded"
59
+ )
60
+
61
+
62
+ if not check_password():
63
+ st.stop() # Do not continue if check_password is not True.
64
+
65
+
66
+ def initialize_pinecone(api_key, index_name):
67
+ pinecone_client = pinecone.Pinecone(api_key=api_key)
68
+ return pinecone_client.Index(index_name)
69
+
70
+ def query_pinecone(index, embeddings, top_k):
71
+ return index.query(vector=embeddings, top_k=top_k, include_metadata=True)
72
+
73
+ def format_query_results(query_results):
74
+ formatted_results = []
75
+ for data in query_results["matches"]:
76
+ document_details = {"Document ID": data['id'], "Page": data['metadata'].get('page', 'No page metadata found'), "Enhancement": data['metadata'].get('enh', 'No enhancement metadata found'), "Score": data['score'], "Text Metadata": data['metadata'].get('text', 'No text metadata found')}
77
+ formatted_results.append(document_details)
78
+ return formatted_results
79
+
80
+ def generate_embeddings(text):
81
+ response = openai.embeddings.create(model=EMBEDDINGS_MODEL, input=[text])
82
+ embeddings = response.data[0].embedding
83
+ return embeddings
84
+
85
+ def collect_all_text(query_results):
86
+ texts = [data['metadata'].get('text', 'No text metadata found') for data in query_results["matches"]]
87
+ all_context = "\n".join(texts)
88
+ return all_context
89
+
90
+
91
+ def get_db_results_new_prompt(user_input):
92
+ with st.spinner("Querying knowledge base..."):
93
+ query_embeddings = generate_embeddings(user_input)
94
+ query_results = query_pinecone(index, query_embeddings, db_results)
95
+ prompt_and_results = "<context>" + collect_all_text(query_results) + "</context>\n<query>" + user_input + "</query>" # Set the flag to indicate that the query has been performed
96
+ st.session_state.query_performed = True
97
+ # st.session_state.generated.append(output)
98
+ return prompt_and_results #, query_results
99
+
100
+ def reset_chat():
101
+ st.session_state['messages'] = []
102
+ st.session_state.query_performed = False
103
+ st.session_state.query_pinecone_toggle = True
104
+ st.session_state['first_input_received'] = False
105
+ st.rerun()
106
+
107
+ def remove_context_tags_and_return_removed_text(text):
108
+ """
109
+ Remove <context> tags and the content between them, and return both the cleaned text and the removed content.
110
+ """
111
+ # Find all occurrences of text within <context> tags
112
+ removed_texts = re.findall(r'<context>(.*?)</context>', text, flags=re.DOTALL)
113
+ # Remove <context> tags and the content between them
114
+ clean_text = re.sub(r'<context>.*?</context>', '', text, flags=re.DOTALL)
115
+ # Join the removed text pieces into a single string, assuming there could be more than one <context> tag
116
+ removed_text = "\n".join(removed_texts)
117
+ # return clean_text, removed_text.strip()
118
+ return clean_text
119
+
120
+ def clean_text(text):
121
+ # Remove XML tags
122
+ text_without_tags = re.sub('<.*?>', '', text)
123
+ # Strip leading and trailing spaces
124
+ cleaned_text = text_without_tags.strip()
125
+ return cleaned_text
126
+
127
+ def truncate_prompt_last_tokens(prompt, max_tokens=5000):
128
+ encoding = tiktoken.get_encoding("cl100k_base")
129
+ encoded_prompt = encoding.encode(prompt)
130
+
131
+ if len(encoded_prompt) <= max_tokens:
132
+ return prompt
133
+
134
+ truncated_encoded_prompt = encoded_prompt[-max_tokens:]
135
+ truncated_prompt = encoding.decode(truncated_encoded_prompt)
136
+
137
+ # Find the first complete sentence or thought
138
+ first_punctuation = min(truncated_prompt.find('.'), truncated_prompt.find('?'), truncated_prompt.find('!'))
139
+ if first_punctuation != -1:
140
+ truncated_prompt = truncated_prompt[first_punctuation + 1:].strip()
141
+
142
+ return truncated_prompt
143
+
144
+ def generate_response(user_input):
145
+ chain = prompt | chat
146
+ try:
147
+ # Truncate the prompt to the last history and user query
148
+ truncated_prompt = truncate_prompt_last_tokens("\n".join([f"{role}: {msg}" for role, msg in st.session_state.messages]), max_tokens=5000)
149
+
150
+ for chunk in chain.stream({"text": truncated_prompt}):
151
+ content = chunk.content
152
+ # Replace $ in content to avoid LaTeX interpretation
153
+ content = content.replace("$", "\\$")
154
+ if content:
155
+ yield content
156
+
157
+ st.session_state['query_pinecone_toggle'] = False
158
+ except Exception as e:
159
+ # Handle the exception
160
+ error_message = f"An error occurred while generating the response: {str(e)}"
161
+ yield error_message
162
+ # You can also log the error or take any other necessary action
163
+ print(error_message)
164
+
165
+
166
+
167
+ ### App UI:
168
+
169
+ index = initialize_pinecone(pinecone_api_key, pinecone_index_name)
170
+
171
+ # Initialize session state variables if they don't exist
172
+ if 'entered_prompt' not in st.session_state:
173
+ st.session_state.entered_prompt = ""
174
+ if 'query_performed' not in st.session_state:
175
+ st.session_state.query_performed = False
176
+ if 'query_pinecone_toggle' not in st.session_state:
177
+ st.session_state.query_pinecone_toggle = True
178
+ if 'first_input_received' not in st.session_state:
179
+ st.session_state['first_input_received'] = False
180
+ if "messages" not in st.session_state:
181
+ st.session_state.messages = []
182
+
183
+
184
+
185
+
186
+ st.title("HUD Assistant")
187
+ #display questions
188
+
189
+ # st.markdown("---\n## Chat:")
190
+
191
+ with st.sidebar:
192
+ # st.sidebar.page_link("pages/11sl.py", label="Text-to-Speech Converter App")
193
+ # if st.button("Home"):
194
+ # st.switch_page("streamlit_app_fb.py")
195
+ # if st.button("Phrase Search"):
196
+ # st.switch_page("pages/search_database_streamlit.py")
197
+ st.write("Welcome to the HUD Assistant, user, email")
198
+ with st.expander("About"):
199
+ st.markdown("""
200
+ - This app is designed to help you find information about HUD programs and policies.
201
+ - Enter your question in the chat box on the right.
202
+ - Choose how many pages of "Results" you would like to get from the knowledge base and added to the AI prompt. Increase the number to extract more information from the knowledge base.
203
+
204
+ - After the first prompt, choose whether you want another knowledge base search for the next prompt using the button below.
205
+ """)
206
+ with st.expander("Example Questions"):
207
+ for question in questions:
208
+ st.markdown(question)
209
+ st.markdown("## Settings:")
210
+ db_results = st.number_input("Results to get from knowledge base:", value=5 , min_value=1)
211
+ if st.button("Reset Chat"):
212
+ reset_chat()
213
+
214
+ # Display previous messages
215
+ for role, message in st.session_state.messages:
216
+ avatar_path = "q-icon.svg" if role == "user" else "a-icon.svg" if role == "assistant" else None
217
+ with st.chat_message(name=role, avatar=avatar_path):
218
+ message_clean0 = remove_context_tags_and_return_removed_text(message)
219
+ message_clean1 = clean_text(message_clean0)
220
+ st.markdown(message_clean1)
221
+
222
+ chat = ChatGroq(temperature=0.1, groq_api_key=groq_api_key, model_name="llama3-70b-8192")
223
+ system = system_prompt
224
+ human = "{text}"
225
+ prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
226
+
227
+ if user_prompt := st.chat_input("What can I help you with about HUD?"):
228
+ original_user_prompt = user_prompt
229
+
230
+ if st.session_state['query_pinecone_toggle']:
231
+ user_prompt = get_db_results_new_prompt(user_prompt)
232
+ else:
233
+ user_prompt = "<query>" + user_prompt + "</query>"
234
+ # truncated_user_prompt = truncate_prompt(user_prompt, max_tokens=4990)
235
+ st.session_state.messages.append(("user", user_prompt))
236
+ st.chat_message("user" , avatar= "q-icon.svg").markdown(original_user_prompt)
237
+ with st.spinner("Generating response..."):
238
+ try:
239
+ response = st.write_stream(generate_response(user_prompt))
240
+ # If no exception occurred, append the response to the messages
241
+ st.session_state.messages.append(("assistant", response))
242
+ except Exception as e:
243
+ error_message = f"An error occurred while generating the response: {str(e)}"
244
+ st.error(error_message)
245
+ response = ""
246
+ # You can also log the error or take any other necessary action
247
+ print(error_message)
248
+ # print(st.session_state)
249
+ # print(st.session_state.messages)
250
+ with st.sidebar:
251
+ st.markdown("Query knowledge base for next message:")
252
+ col1, col2, col3 = st.columns([0.3, 0.2, 0.5])
253
+ with col1:
254
+ if st.button('Yes'):
255
+ st.session_state['query_pinecone_toggle'] = True
256
+
257
+ with col2:
258
+ if st.button('No'):
259
+ st.session_state['query_pinecone_toggle'] = False
260
+
261
+ with col3:
262
+ if st.session_state['query_pinecone_toggle']:
263
+ st.markdown("<span style='color: green;'>☑<sup><sub> Knowledge base will be queried for the next message</sub></sup></span>", unsafe_allow_html=True)
264
+ else:
265
+ st.markdown("<span style='color:#8B8000;'>☐<sup><sub> Knowledge base will NOT be queried for the next message</sub></sup></span>", unsafe_allow_html=True)
266
+ st.markdown("Support: https://docmind.ai")
property-icon.ico ADDED
q-icon.svg ADDED