Spaces:

DocMindAI
/

hudbot

Configuration error

App Files Files

truevis commited on May 1

Commit

fe0b5ee

•

1 Parent(s): 5fe11e1

new

Browse files

Files changed (5) hide show

.streamlit/config.toml +6 -0
a-icon.svg +1 -0
app.py +266 -0
property-icon.ico +0 -0
q-icon.svg +1 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[theme]
+base = "light"
+[client]
+showSidebarNavigation = false

a-icon.svg ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import streamlit as st
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+import pinecone
+import hmac
+import tiktoken
+import re
+import openai
+# import firebase_stripe_auth
+EMBEDDINGS_MODEL = 'text-embedding-3-small'
+groq_api_key = st.secrets["GROQ_API_KEY"]
+pinecone_api_key = st.secrets["PINECONE_API_KEY"]
+pinecone_index_name = st.secrets["PINECONE_INDEX_NAME"]
+streamlit_api_key = st.secrets["STREAMLIT_API_KEY"]
+system_prompt = """<system_prompt> You are an AI assistant with expertise in U.S. Department of Housing and Urban Development (HUD) programs and policies. Provide accurate, helpful information to assist people working with HUD and its clients in the United States. <knowledge> Understand HUD's major program offices: - Community Planning and Development (CDBG, HOME, Shelter Plus Care, ESG, Mod Rehab SRO, HOPWA) - Housing (FHA, Fannie Mae/Freddie Mac, Section 202, Section 811, Project-Based Section 8) - Public and Indian Housing (Public housing, HOPE VI, Housing Choice Vouchers, Native American/Hawaiian/Alaskan housing grants) - Fair Housing and Equal Opportunity - Policy Development and Research - Ginnie Mae, Healthy Homes and Lead Hazard Control, Partnership for Advancing Technology in Housing </knowledge> <communication> - Assistant answers ONLY in the context of U.S. Department of Housing and Urban Development (HUD) expertise and issues within the United States - Explain HUD programs clearly and accurately - Provide relevant details and suggest resources - Maintain professional, respectful, and empathetic communication - Use a neutral and professional tone in all responses - List each URL verbatim as it appears after "Source: ". Example: If the data contain "Source: https://docmind.ai/hud/pages/CFR-2018-title24-vol4-part982_page_45.pdf", respond with "\nSource 1: https://docmind.ai/hud/pages/CFR-2018-title24-vol4-part982_page_45.pdf" and so on. </communication> <task_completion> - Answer questions about HUD programs and policies in the United States - Determine client eligibility for assistance in the United States - Guide applications and participation in HUD programs within the United States - Troubleshoot HUD-related housing issues in the United States - Explain tenant and landlord rights and responsibilities under HUD in the United States </task_completion> Be a knowledgeable, helpful resource referencing the <context> about <query> specifically for HUD issues in the United States. </system_prompt>"""
+questions = [
+    "What are the eligibility requirements for the Housing Choice Voucher Program (Section 8), and how can I assist a client in applying for this program?",
+    "My client is facing discrimination in his HUD-assisted housing based on his race. What steps can he take to file a complaint with the Office of Fair Housing and Equal Opportunity, and what support can HUD provide?",
+    "Can you explain the differences between the public housing program and the Section 202 Supportive Housing for the Elderly program, and help me determine which one might be a better fit for my elderly client?",
+    "I'm working with a community organization that wants to apply for a Community Development Block Grant (CDBG) to revitalize a low-income neighborhood. What are some key things we should know about the application process and how the funds can be used?",
+    "List the PDF source pages and their URLS for the previous chat response from Assistant. List the full exact URLs from the source data.",
+    "summarize the previous chat response from Assistant",
+    "generate a draft email focusing directly on the subject or main question. Omit any initial greetings or pleasantries, such as 'I hope this message finds you well.'"
+]
+def check_password():
+    """Returns `True` if the user had the correct password."""
+    def password_entered():
+        """Checks whether a password entered by the user is correct."""
+        if hmac.compare_digest(st.session_state["password"], st.secrets["password"]):
+            st.session_state["password_correct"] = True
+            del st.session_state["password"]  # Don't store the password.
+        else:
+            st.session_state["password_correct"] = False
+    # Return True if the password is validated.
+    if st.session_state.get("password_correct", False):
+        return True
+    # Show input for password.
+    st.text_input(
+        "Password for HUD Assistant", type="password", on_change=password_entered, key="password"
+    )
+    if "password_correct" in st.session_state:
+        st.error("😕 Password incorrect")
+    return False
+st.set_page_config(
+    page_title="HUD Assistant",
+    page_icon="property-icon.ico",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+if not check_password():
+    st.stop()  # Do not continue if check_password is not True.
+def initialize_pinecone(api_key, index_name):
+    pinecone_client = pinecone.Pinecone(api_key=api_key)
+    return pinecone_client.Index(index_name)
+def query_pinecone(index, embeddings, top_k):
+    return index.query(vector=embeddings, top_k=top_k, include_metadata=True)
+def format_query_results(query_results):
+    formatted_results = []
+    for data in query_results["matches"]:
+        document_details = {"Document ID": data['id'], "Page": data['metadata'].get('page', 'No page metadata found'), "Enhancement": data['metadata'].get('enh', 'No enhancement metadata found'), "Score": data['score'], "Text Metadata": data['metadata'].get('text', 'No text metadata found')}
+        formatted_results.append(document_details)
+    return formatted_results
+def generate_embeddings(text):
+    response = openai.embeddings.create(model=EMBEDDINGS_MODEL, input=[text])
+    embeddings = response.data[0].embedding
+    return embeddings
+def collect_all_text(query_results):
+    texts = [data['metadata'].get('text', 'No text metadata found') for data in query_results["matches"]]
+    all_context = "\n".join(texts)
+    return all_context
+def get_db_results_new_prompt(user_input):
+    with st.spinner("Querying knowledge base..."):
+        query_embeddings = generate_embeddings(user_input)
+        query_results = query_pinecone(index, query_embeddings, db_results)
+        prompt_and_results = "<context>" + collect_all_text(query_results) + "</context>\n<query>" + user_input + "</query>"        # Set the flag to indicate that the query has been performed
+        st.session_state.query_performed = True
+    # st.session_state.generated.append(output)
+    return prompt_and_results #, query_results
+def reset_chat():
+    st.session_state['messages'] = []
+    st.session_state.query_performed = False
+    st.session_state.query_pinecone_toggle = True
+    st.session_state['first_input_received'] = False
+    st.rerun()
+def remove_context_tags_and_return_removed_text(text):
+    """
+    Remove <context> tags and the content between them, and return both the cleaned text and the removed content.
+    """
+    # Find all occurrences of text within <context> tags
+    removed_texts = re.findall(r'<context>(.*?)</context>', text, flags=re.DOTALL)
+    # Remove <context> tags and the content between them
+    clean_text = re.sub(r'<context>.*?</context>', '', text, flags=re.DOTALL)
+    # Join the removed text pieces into a single string, assuming there could be more than one <context> tag
+    removed_text = "\n".join(removed_texts)
+    # return clean_text, removed_text.strip()
+    return clean_text
+def clean_text(text):
+    # Remove XML tags
+    text_without_tags = re.sub('<.*?>', '', text)
+    # Strip leading and trailing spaces
+    cleaned_text = text_without_tags.strip()
+    return cleaned_text
+def truncate_prompt_last_tokens(prompt, max_tokens=5000):
+    encoding = tiktoken.get_encoding("cl100k_base")
+    encoded_prompt = encoding.encode(prompt)
+    if len(encoded_prompt) <= max_tokens:
+        return prompt
+    truncated_encoded_prompt = encoded_prompt[-max_tokens:]
+    truncated_prompt = encoding.decode(truncated_encoded_prompt)
+    # Find the first complete sentence or thought
+    first_punctuation = min(truncated_prompt.find('.'), truncated_prompt.find('?'), truncated_prompt.find('!'))
+    if first_punctuation != -1:
+        truncated_prompt = truncated_prompt[first_punctuation + 1:].strip()
+    return truncated_prompt
+def generate_response(user_input):
+    chain = prompt | chat
+    try:
+        # Truncate the prompt to the last history and user query
+        truncated_prompt = truncate_prompt_last_tokens("\n".join([f"{role}: {msg}" for role, msg in st.session_state.messages]), max_tokens=5000)
+        for chunk in chain.stream({"text": truncated_prompt}):
+            content = chunk.content
+            # Replace $ in content to avoid LaTeX interpretation
+            content = content.replace("$", "\\$")
+            if content:
+                yield content
+        st.session_state['query_pinecone_toggle'] = False
+    except Exception as e:
+        # Handle the exception
+        error_message = f"An error occurred while generating the response: {str(e)}"
+        yield error_message
+        # You can also log the error or take any other necessary action
+        print(error_message)
+### App UI:
+index = initialize_pinecone(pinecone_api_key, pinecone_index_name)
+# Initialize session state variables if they don't exist
+if 'entered_prompt' not in st.session_state:
+    st.session_state.entered_prompt = ""
+if 'query_performed' not in st.session_state:
+    st.session_state.query_performed = False
+if 'query_pinecone_toggle' not in st.session_state:
+    st.session_state.query_pinecone_toggle = True
+if 'first_input_received' not in st.session_state:
+    st.session_state['first_input_received'] = False
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+st.title("HUD Assistant")
+#display questions
+# st.markdown("---\n## Chat:")
+with st.sidebar:
+    # st.sidebar.page_link("pages/11sl.py", label="Text-to-Speech Converter App")
+    # if st.button("Home"):
+    #     st.switch_page("streamlit_app_fb.py")
+    # if st.button("Phrase Search"):
+    #     st.switch_page("pages/search_database_streamlit.py")
+    st.write("Welcome to the HUD Assistant, user, email")
+    with st.expander("About"):
+        st.markdown("""
+- This app is designed to help you find information about HUD programs and policies.
+- Enter your question in the chat box on the right.
+- Choose how many pages of "Results" you would like to get from the knowledge base and added to the AI prompt. Increase the number to extract more information from the knowledge base.
+- After the first prompt, choose whether you want another knowledge base search for the next prompt using the button below.
+  """)
+    with st.expander("Example Questions"):
+        for question in questions:
+            st.markdown(question)
+    st.markdown("## Settings:")
+    db_results = st.number_input("Results to get from knowledge base:", value=5 , min_value=1)
+    if st.button("Reset Chat"):
+                reset_chat()
+# Display previous messages
+for role, message in st.session_state.messages:
+    avatar_path = "q-icon.svg" if role == "user" else "a-icon.svg" if role == "assistant" else None
+    with st.chat_message(name=role, avatar=avatar_path):
+        message_clean0 = remove_context_tags_and_return_removed_text(message)
+        message_clean1 = clean_text(message_clean0)
+        st.markdown(message_clean1)
+chat = ChatGroq(temperature=0.1, groq_api_key=groq_api_key, model_name="llama3-70b-8192")
+system = system_prompt
+human = "{text}"
+prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
+if user_prompt := st.chat_input("What can I help you with about HUD?"):
+    original_user_prompt = user_prompt
+    if st.session_state['query_pinecone_toggle']:
+        user_prompt = get_db_results_new_prompt(user_prompt)
+    else:
+        user_prompt = "<query>" + user_prompt + "</query>"
+    # truncated_user_prompt = truncate_prompt(user_prompt, max_tokens=4990)
+    st.session_state.messages.append(("user", user_prompt))
+    st.chat_message("user" , avatar= "q-icon.svg").markdown(original_user_prompt)
+    with st.spinner("Generating response..."):
+        try:
+            response = st.write_stream(generate_response(user_prompt))
+            # If no exception occurred, append the response to the messages
+            st.session_state.messages.append(("assistant", response))
+        except Exception as e:
+            error_message = f"An error occurred while generating the response: {str(e)}"
+            st.error(error_message)
+            response = ""
+            # You can also log the error or take any other necessary action
+            print(error_message)
+    # print(st.session_state)
+    # print(st.session_state.messages)
+with st.sidebar:
+    st.markdown("Query knowledge base for next message:")
+    col1, col2, col3 = st.columns([0.3, 0.2, 0.5])
+    with col1:
+        if st.button('Yes'):
+                st.session_state['query_pinecone_toggle'] = True
+    with col2:
+        if st.button('No'):
+            st.session_state['query_pinecone_toggle'] = False
+    with col3:
+        if st.session_state['query_pinecone_toggle']:
+            st.markdown("<span style='color: green;'>☑<sup><sub> Knowledge base will be queried for the next message</sub></sup></span>", unsafe_allow_html=True)
+        else:
+            st.markdown("<span style='color:#8B8000;'>☐<sup><sub> Knowledge base will NOT be queried for the next message</sub></sup></span>", unsafe_allow_html=True)
+    st.markdown("Support: https://docmind.ai")

property-icon.ico ADDED Viewed

q-icon.svg ADDED Viewed