Spaces:

Athsara
/

DOGE-grok-ai-dev

Running

App Files Files Community

Athsara commited on Dec 15, 2024

Commit

5e416e3

verified ·

1 Parent(s): e7afd7f

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -173

app.py CHANGED Viewed

@@ -1,173 +1,173 @@
-import os
-import streamlit as st
-from dotenv import load_dotenv
-from PyPDF2 import PdfReader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings  # Changed to HuggingFace
-from langchain.vectorstores import FAISS
-from langchain.chat_models import ChatOpenAI  # For LLM
-from langchain.memory import ConversationBufferMemory
-from langchain.chains import ConversationalRetrievalChain
-from htmlTemplates import css, bot_template, user_template
-# Function to extract text from PDF documents
-def get_pdf_text(pdf_docs):
-    text = ""
-    for pdf in pdf_docs:
-        pdf_reader = PdfReader(pdf)
-        for page in pdf_reader.pages:
-            extracted_text = page.extract_text()
-            if extracted_text:
-                text += extracted_text
-    return text
-# Function to split text into manageable chunks
-def get_text_chunks(text):
-    text_splitter = CharacterTextSplitter(
-        separator="\n",
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len
-    )
-    chunks = text_splitter.split_text(text)
-    return chunks
-# Function to create a vector store using HuggingFace embeddings
-def get_vectorstore(text_chunks, huggingface_api_key):
-    embeddings = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-MiniLM-L6-v2",  # Choose an appropriate model
-        model_kwargs={"use_auth_token": huggingface_api_key}
-    )
-    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-    return vectorstore
-# Function to initialize the conversational retrieval chain with GrokAI
-def get_conversation_chain(vectorstore, grok_api_key, grok_api_base):
-    llm = ChatOpenAI(
-        openai_api_key=grok_api_key,
-        openai_api_base=grok_api_base,
-        model_name="grok-beta",  # Specify GrokAI's model
-        temperature=0.5
-    )
-    memory = ConversationBufferMemory(
-        memory_key='chat_history', return_messages=True
-    )
-    conversation_chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,  # Use the configured GrokAI LLM
-        retriever=vectorstore.as_retriever(),
-        memory=memory
-    )
-    return conversation_chain
-# Function to handle user input and generate responses
-def handle_userinput(user_question):
-    if st.session_state.conversation is None:
-        st.warning("Documents are still being processed. Please wait.")
-        return
-    response = st.session_state.conversation({'question': user_question})
-    st.session_state.chat_history = response['chat_history']
-# Function triggered when the user presses Enter in the input box
-def on_enter():
-    user_question = st.session_state.user_question
-    if user_question:
-        handle_userinput(user_question)
-        st.session_state.user_question = ""  # Clear the input box
-# Function to load and process PDF documents
-def load_and_process_pdfs(folder_path, huggingface_api_key, grok_api_key, grok_api_base):
-    pdf_files = [file for file in os.listdir(folder_path) if file.lower().endswith('.pdf')]
-    if not pdf_files:
-        st.error(f"No PDF files found in the directory: {folder_path}")
-        return
-    pdf_docs = []
-    for file in pdf_files:
-        file_path = os.path.join(folder_path, file)
-        pdf_docs.append(file_path)
-    with st.spinner("Processing documents..."):
-        # Extract text from PDFs
-        with st.spinner("Extracting text from PDFs..."):
-            pdf_file_objects = [open(file, 'rb') for file in pdf_docs]
-            raw_text = get_pdf_text(pdf_file_objects)
-            # Close the files after reading
-            for f in pdf_file_objects:
-                f.close()
-        # Split text into chunks
-        with st.spinner("Splitting text into chunks..."):
-            text_chunks = get_text_chunks(raw_text)
-        # Create vector store using HuggingFace embeddings
-        with st.spinner("Creating vector store..."):
-            vectorstore = get_vectorstore(text_chunks, huggingface_api_key)
-        # Initialize conversation chain with GrokAI LLM
-        with st.spinner("Initializing conversation chain..."):
-            st.session_state.conversation = get_conversation_chain(vectorstore, grok_api_key, grok_api_base)
-    st.success("Documents processed successfully!")
-# Function to display chat history with auto-scrolling
-def display_chat_history():
-    if st.session_state.chat_history:
-        for i, message in enumerate(st.session_state.chat_history):
-            if i % 2 == 0:
-                st.markdown(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
-            else:
-                st.markdown(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
-        # Inject JavaScript to scroll the entire page to the bottom
-        scroll_script = """
-        <script>
-            // Function to scroll to the bottom of the page
-            function scrollToBottom() {
-                window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
-            }
-            // Delay to ensure the DOM is fully rendered
-            setTimeout(scrollToBottom, 100);
-        </script>
-        """
-        st.markdown(scroll_script, unsafe_allow_html=True)
-# Main function to run the Streamlit app
-def main():
-    load_dotenv()
-    # Retrieve credentials from .env
-    grok_api_key = os.getenv("GROK_API_KEY")
-    grok_api_base = "https://api.x.ai/v1"  # GrokAI's API base URL
-    huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    st.set_page_config(page_title="Chat with AI Tax Agent", page_icon=":books:")
-    st.write(css, unsafe_allow_html=True)
-    if "conversation" not in st.session_state:
-        st.session_state.conversation = None
-    if "chat_history" not in st.session_state:
-        st.session_state.chat_history = []
-    # Title Section
-    st.header("Chat with AI Tax Agent :books:")
-    # Automatically load and process PDFs on startup
-    if st.session_state.conversation is None:
-        documents_folder = "./documents/"  # Specify your documents folder path here
-        load_and_process_pdfs(documents_folder, huggingface_api_key, grok_api_key, grok_api_base)
-    # Chat History Section
-    display_chat_history()
-    # Input Box Section
-    st.text_input(
-        "Ask a question about your documents:",
-        key='user_question',
-        on_change=on_enter
-    )
-if __name__ == '__main__':
-    main()

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings  # Changed to HuggingFace
+from langchain.vectorstores import FAISS
+from langchain.chat_models import ChatOpenAI  # For LLM
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from htmlTemplates import css, bot_template, user_template
+# Function to extract text from PDF documents
+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            extracted_text = page.extract_text()
+            if extracted_text:
+                text += extracted_text
+    return text
+# Function to split text into manageable chunks
+def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+# Function to create a vector store using HuggingFace embeddings
+def get_vectorstore(text_chunks, huggingface_api_key):
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2",  # Choose an appropriate model
+        model_kwargs={"use_auth_token": huggingface_api_key}
+    )
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vectorstore
+# Function to initialize the conversational retrieval chain with GrokAI
+def get_conversation_chain(vectorstore, grok_api_key, grok_api_base):
+    llm = ChatOpenAI(
+        openai_api_key=grok_api_key,
+        openai_api_base=grok_api_base,
+        model_name="grok-beta",  # Specify GrokAI's model
+        temperature=0.5
+    )
+    memory = ConversationBufferMemory(
+        memory_key='chat_history', return_messages=True
+    )
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,  # Use the configured GrokAI LLM
+        retriever=vectorstore.as_retriever(),
+        memory=memory
+    )
+    return conversation_chain
+# Function to handle user input and generate responses
+def handle_userinput(user_question):
+    if st.session_state.conversation is None:
+        st.warning("Documents are still being processed. Please wait.")
+        return
+    response = st.session_state.conversation({'question': user_question})
+    st.session_state.chat_history = response['chat_history']
+# Function triggered when the user presses Enter in the input box
+def on_enter():
+    user_question = st.session_state.user_question
+    if user_question:
+        handle_userinput(user_question)
+        st.session_state.user_question = ""  # Clear the input box
+# Function to load and process PDF documents
+def load_and_process_pdfs(folder_path, huggingface_api_key, grok_api_key, grok_api_base):
+    pdf_files = [file for file in os.listdir(folder_path) if file.lower().endswith('.pdf')]
+    if not pdf_files:
+        st.error(f"No PDF files found in the directory: {folder_path}")
+        return
+    pdf_docs = []
+    for file in pdf_files:
+        file_path = os.path.join(folder_path, file)
+        pdf_docs.append(file_path)
+    with st.spinner("Processing documents..."):
+        # Extract text from PDFs
+        with st.spinner("Extracting text from PDFs..."):
+            pdf_file_objects = [open(file, 'rb') for file in pdf_docs]
+            raw_text = get_pdf_text(pdf_file_objects)
+            # Close the files after reading
+            for f in pdf_file_objects:
+                f.close()
+        # Split text into chunks
+        with st.spinner("Splitting text into chunks..."):
+            text_chunks = get_text_chunks(raw_text)
+        # Create vector store using HuggingFace embeddings
+        with st.spinner("Creating vector store..."):
+            vectorstore = get_vectorstore(text_chunks, huggingface_api_key)
+        # Initialize conversation chain with GrokAI LLM
+        with st.spinner("Initializing conversation chain..."):
+            st.session_state.conversation = get_conversation_chain(vectorstore, grok_api_key, grok_api_base)
+    st.success("Documents processed successfully!")
+# Function to display chat history with auto-scrolling
+def display_chat_history():
+    if st.session_state.chat_history:
+        for i, message in enumerate(st.session_state.chat_history):
+            if i % 2 == 0:
+                st.markdown(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
+            else:
+                st.markdown(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
+        # Inject JavaScript to scroll the entire page to the bottom
+        scroll_script = """
+        <script>
+            // Function to scroll to the bottom of the page
+            function scrollToBottom() {
+                window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
+            }
+            // Delay to ensure the DOM is fully rendered
+            setTimeout(scrollToBottom, 100);
+        </script>
+        """
+        st.markdown(scroll_script, unsafe_allow_html=True)
+# Main function to run the Streamlit app
+def main():
+    load_dotenv()
+    # Retrieve credentials from .env
+    grok_api_key = os.getenv("GROK_API_KEY")
+    grok_api_base = "https://api.x.ai/v1"  # GrokAI's API base URL
+    huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    st.set_page_config(page_title="Chat with AI Tax Agent", page_icon=":books:")
+    st.write(css, unsafe_allow_html=True)
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    # Title Section
+    st.header("Chat with AI Tax Agent :books:")
+    # Automatically load and process PDFs on startup
+    if st.session_state.conversation is None:
+        documents_folder = "./documents/"  # Specify your documents folder path here
+        load_and_process_pdfs(documents_folder, huggingface_api_key, grok_api_key, grok_api_base)
+    # Chat History Section
+    display_chat_history()
+    # Input Box Section
+    st.text_input(
+        "Ask a question about your documents:",
+        key='user_question',
+        on_change=on_enter
+    )
+if __name__ == '__main__':
+    main()