import streamlit as st from langchain_groq import ChatGroq from langchain_core.prompts import ChatPromptTemplate import pinecone import hmac import tiktoken import re import openai # import firebase_stripe_auth from utils.firebase_logging import init_firebase, log_to_firestore EMBEDDINGS_MODEL = 'text-embedding-3-small' groq_api_key = st.secrets["GROQ_API_KEY"] pinecone_api_key = st.secrets["PINECONE_API_KEY"] pinecone_index_name = st.secrets["PINECONE_INDEX_NAME"] streamlit_api_key = st.secrets["STREAMLIT_API_KEY"] system_prompt = """ # Electrical Project Assistant ## Assistant - Assistant is an Electrical Project Assistant dedicated to providing accurate and reliable information based on its knowledge base. - Assistant is designed to aid the User in electrical engineering and design questions, focusing on construction codes in Texas, USA. - Assistant has the persona of a wise, experienced electrical engineer, designer, and specification writer. - Assistant answers queries about this project's Construction Documents Specifications ## Assistant's Task - Assistant's primary task is to answer User's questions with clear and reliable responses. - Assistant's responses are solely based on the information available in its knowledge base. ## Constraints - Assistant never makes assumptions or guesses -- it always refers to its knowledge base. - Assistant always provides a reference and link to the knowledge base source in its responses. ## Inability to Respond - If a definitive answer is not available in the knowledge base, Assistant acknowledges its inability and informs the User it cannot provide a response. ## Writing Style - Assistant communicates as a knowledgeable and experienced electrical engineer, familiar with the electrical construction codes in Texas, USA. - Assistant's responses are factual, clear, and concise. ## Hot Keys p = list the PDF source pages and their URLS for the previous chat response from Assistant. List the full exact URLS from the source data. s = summarize the previous chat response from Assistant r = generate a Request for Information (RFI) on the subject asking for clarification on the subject. e = generate a draft email focusing directly on the subject or main question. Omit any initial greetings or pleasantries, such as 'I hope this message finds you well.' t = show any data from the previous chat response in table format ## Response - Assistant always answers questions with clear and reliable responses solely based on the information available in its knowledge base. - Assistant always cites and links to the Source from its knowledge base in its responses. - Assistant always reminds the User to check the original documents to ensure the accuracy of the information provided. - Always Extract and list the source URLs from the provided data. - Search for the exact phrase "Source: " followed by a URL. - List each URL verbatim as it appears after "Source: ". Example: If the data contains "Source: https://www.bigstateelectric.com/ai/assistant/specs/utsa2/pages/UTSA_page_345.pdf", respond with "Source 1: https://www.bigstateelectric.com/ai/assistant/specs/utsa2/pages/UTSA_page_345.pdf" and so on. """ questions = [ "Identify and provide the standard color codes for electrical conductors as specified in the project documents, focusing on the section detailing wire identification for power circuits.", "Summarize all annotations and list the PDF pages where they were made.", "Create RFI on the subject. The RFI creator is 'Big State Electric'", "System installer", "Docking Station", "Outdoor grounding requirements", "List the PDF source pages and their URLS for the previous chat response from Assistant. List the full exact URLs from the source data.", "Draft a formal business email using context provided in the chat. Omit any initial greetings or pleasantries. Use a colon after the \"Dear\" part.", """Write article about the subject in the following format: ## Engaging Article Title ### Overview {give a brief introduction of the article and why the user should read this report} {make this section engaging and create a hook for the reader} ### Section 1 {break the article into sections} {provide details/facts/processes in this section} ... more sections as necessary... ### Takeaways {provide key takeaways from the article} ### References - [Title](url) - [Title](url) - [Title](url) """ ] about = """ - This app is designed to help you find information about the project's Construction Documents Specifications. - Source document: https://www.bigstateelectric.com/ai/specs/uhs/source/uhs_palo_alto.pdf - Enter your question in the chat box on the right. - Hotkeys: e = Email draft. p = List ref URLs. s = summarize response. r = RFI creation. t = show previous data in table. - Choose how many pages of "Results" you would like to get from the knowledge base and added to the AI prompt. Increase the number to extract more information from the knowledge base. - After the first prompt, choose whether you want another knowledge base search for the next prompt using the button below. - The PDF pages from the knowledge base are published online and citable. Each PDF page has a link at the bottom going directly the the PDF source page so it can be seen in context. - This app works best on desktop browsers and is not yet optimized for mobile devices. - BSE Chatbot version 0.1 (c) 2024 """ def check_password(): """Returns `True` if the user had the correct password.""" def password_entered(): """Checks whether a password entered by the user is correct.""" if hmac.compare_digest(st.session_state["password"], st.secrets["password"]): st.session_state["password_correct"] = True del st.session_state["password"] # Don't store the password. else: st.session_state["password_correct"] = False # Return True if the password is validated. if st.session_state.get("password_correct", False): return True # Show input for password. st.text_input( "Password for Assistant", type="password", on_change=password_entered, key="password" ) if "password_correct" in st.session_state: st.error("😕 Password incorrect") # Read the privacy policy text from a file with open("utils/privacy_policy.txt", "r") as file: privacy_policy = file.read() # Display the privacy policy in an accordion with st.expander("Privacy Policy"): st.write(privacy_policy) return False st.set_page_config( page_title="BSE Specification Assistant", page_icon="favicon.ico", layout="wide", initial_sidebar_state="auto" ) if not check_password(): st.stop() # Do not continue if check_password is not True. def initialize_pinecone(api_key, index_name): pinecone_client = pinecone.Pinecone(api_key=api_key) return pinecone_client.Index(index_name) def query_pinecone(index, embeddings, top_k): return index.query(vector=embeddings, top_k=top_k, include_metadata=True) def format_query_results(query_results): formatted_results = [] for data in query_results["matches"]: document_details = {"Document ID": data['id'], "Page": data['metadata'].get('page', 'No page metadata found'), "Enhancement": data['metadata'].get('enh', 'No enhancement metadata found'), "Score": data['score'], "Text Metadata": data['metadata'].get('text', 'No text metadata found')} formatted_results.append(document_details) return formatted_results def generate_embeddings(text): response = openai.embeddings.create(model=EMBEDDINGS_MODEL, input=[text]) embeddings = response.data[0].embedding return embeddings def collect_all_text(query_results): texts = [data['metadata'].get('text', 'No text metadata found') for data in query_results["matches"]] all_context = "\n".join(texts) return all_context def get_db_results_new_prompt(user_input): with st.spinner("Querying knowledge base..."): query_embeddings = generate_embeddings(user_input) query_results = query_pinecone(index, query_embeddings, db_results) prompt_and_results = "" + collect_all_text(query_results) + "\n" + user_input + "" # Set the flag to indicate that the query has been performed st.session_state.query_performed = True # st.session_state.generated.append(output) return prompt_and_results #, query_results def reset_chat(): st.session_state['messages'] = [] st.session_state.query_performed = False st.session_state.query_pinecone_toggle = True st.session_state['first_input_received'] = False st.rerun() # def remove_context_tags_and_return_removed_text(text): # """ # Remove tags and the content between them, and return both the cleaned text and the removed content. # """ # # Find all occurrences of text within tags # removed_texts = re.findall(r'(.*?)', text, flags=re.DOTALL) # # Remove tags and the content between them # clean_text = re.sub(r'.*?', '', text, flags=re.DOTALL) # # Join the removed text pieces into a single string, assuming there could be more than one tag # removed_text = "\n".join(removed_texts) # # return clean_text, removed_text.strip() # return clean_text def remove_context_tags_and_return_removed_text(text): """ Remove tags and the content between them, and return the cleaned text. """ # Remove tags and the content between them clean_text = re.sub(r'.*?', '', text, flags=re.DOTALL) return clean_text def clean_text(text): # Remove XML tags text_without_tags = re.sub('<.*?>', '', text) # Strip leading and trailing spaces cleaned_text = text_without_tags.strip() return cleaned_text def truncate_prompt_last_tokens(prompt, max_tokens=4000): encoding = tiktoken.get_encoding("cl100k_base") encoded_prompt = encoding.encode(prompt) if len(encoded_prompt) <= max_tokens: return prompt truncated_encoded_prompt = encoded_prompt[-max_tokens:] truncated_prompt = encoding.decode(truncated_encoded_prompt) # Find the first complete sentence or thought first_punctuation = min(truncated_prompt.find('.'), truncated_prompt.find('?'), truncated_prompt.find('!')) if first_punctuation != -1: truncated_prompt = truncated_prompt[first_punctuation + 1:].strip() return truncated_prompt def generate_response(user_input): chain = prompt | chat try: # Truncate the prompt to the last history and user query truncated_prompt = truncate_prompt_last_tokens("\n".join([f"{role}: {msg}" for role, msg in st.session_state.messages]), max_tokens=4000) for chunk in chain.stream({"text": truncated_prompt}): content = chunk.content # Replace $ in content to avoid LaTeX interpretation # content = content.replace("$", "\\$") content = replace_dollar_with_fullwidth(content) if content: yield content st.session_state['query_pinecone_toggle'] = False except Exception as e: # Handle the exception error_message = f"An error occurred while generating the response: {str(e)}" message = f"bse,anon,generate_response,{e}" level = "ERROR" log_to_firestore("docmind-421208", message, level) yield error_message # You can also log the error or take any other necessary action print(error_message) def replace_dollar_with_fullwidth(text): return text.replace('$', '$') ### App UI: index = initialize_pinecone(pinecone_api_key, pinecone_index_name) # Initialize session state variables if they don't exist if 'entered_prompt' not in st.session_state: st.session_state.entered_prompt = "" if 'query_performed' not in st.session_state: st.session_state.query_performed = False if 'query_pinecone_toggle' not in st.session_state: st.session_state.query_pinecone_toggle = True if 'first_input_received' not in st.session_state: st.session_state['first_input_received'] = False if "messages" not in st.session_state: st.session_state.messages = [] st.title("BSE Specifications Assistant") st.markdown('''Knowledge Base Source: **University Health Community Hospital and Associated Projects - Palo Alto** - MM #22029.002 - March 22, 2024''') #display questions # st.markdown("---\n## Chat:") with st.sidebar: # st.sidebar.page_link("pages/11sl.py", label="Text-to-Speech Converter App") # if st.button("Home"): # st.switch_page("streamlit_app_fb.py") # if st.button("Phrase Search"): # st.switch_page("pages/search_database_streamlit.py") st.write("Welcome to the BSE Specs Assistant") with st.expander("About"): st.markdown(about) with st.expander("Example Prompts"): for question in questions: # st.markdown(question) st.code(question, language="None", line_numbers=False) st.markdown("## Settings:") db_results = st.number_input("Results to get from knowledge base:", value=5 , min_value=1) if st.button("Reset Chat"): reset_chat() # Display previous messages for role, message in st.session_state.messages: avatar_path = "q-icon.svg" if role == "user" else "a-icon.svg" if role == "assistant" else None with st.chat_message(name=role, avatar=avatar_path): message_clean0 = remove_context_tags_and_return_removed_text(message) message_clean1 = clean_text(message_clean0) message_clean2 = replace_dollar_with_fullwidth(message_clean1) st.markdown(message_clean2) chat = ChatGroq(temperature=0.1, groq_api_key=groq_api_key, model_name="llama3-70b-8192") system = system_prompt human = "{text}" prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)]) if user_prompt := st.chat_input("Prompt about these specifications"): original_user_prompt = user_prompt message = f"bse,anon,p,{user_prompt[:200]}" level = "INFO" log_to_firestore("docmind-421208", message, level) if st.session_state['query_pinecone_toggle']: user_prompt = get_db_results_new_prompt(user_prompt) else: user_prompt = "" + user_prompt + "" # truncated_user_prompt = truncate_prompt(user_prompt, max_tokens=4990) st.session_state.messages.append(("user", user_prompt)) st.chat_message("user" , avatar= "q-icon.svg").markdown(replace_dollar_with_fullwidth(original_user_prompt)) with st.spinner("Generating response..."): try: response = st.write_stream(generate_response(user_prompt)) message = f"bse,anon,r,{response[:200]}" # print(message) level = "INFO" log_to_firestore("docmind-421208", message, level) # If no exception occurred, append the response to the messages st.session_state.messages.append(("assistant", response)) except Exception as e: error_message = f"An error occurred while generating the response: {str(e)}" st.error(error_message) message = f"bse,anon,show_response,{e}" level = "ERROR" log_to_firestore("docmind-421208", message, level) response = "" # You can also log the error or take any other necessary action print(error_message) # print(st.session_state) # print(st.session_state.messages) with st.sidebar: st.markdown("Query knowledge base for next message:") col1, col2, col3 = st.columns([0.3, 0.2, 0.5]) with col1: if st.button('Yes'): st.session_state['query_pinecone_toggle'] = True with col2: if st.button('No'): st.session_state['query_pinecone_toggle'] = False with col3: if st.session_state['query_pinecone_toggle']: st.markdown(" Knowledge base will be queried for the next message", unsafe_allow_html=True) else: st.markdown(" Knowledge base will NOT be queried for the next message", unsafe_allow_html=True) st.markdown("Support: Contact https://truevis.com")