Spaces:

Anne31415
/

Public_BookBot

Sleeping

App Files Files Community

Anne31415 commited on Feb 6

Commit

9a6164d

•

1 Parent(s): fe37ee0

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -65

app.py CHANGED Viewed

@@ -1,9 +1,5 @@
 import streamlit as st
-from sentence_transformers import SentenceTransformer
-import numpy as np
 from PIL import Image
-import pinecone
-import pinecone_text
 import random
 import time
 from dotenv import load_dotenv
@@ -20,6 +16,8 @@ from langchain.callbacks import get_openai_callback
 import os
 import uuid
 import json
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
@@ -38,17 +36,6 @@ if 'chat_history_page3' not in st.session_state:
 if 'session_id' not in st.session_state:
     st.session_state['session_id'] = str(uuid.uuid4())
-from pinecone import Pinecone
-# Initialize Pinecone client
-api_key = os.environ["api_key"]
-# configure client
-pc = Pinecone(api_key=api_key)
-index_name = "canopy--document-uploader"  # Replace with your chosen index name
-index = pc.Index(name=index_name)
 # Step 1: Clone the Dataset Repository
@@ -95,7 +82,7 @@ def load_vector_store(file_path, store_name, force_reload=False):
         #st.text(f"Loaded existing vector store from {vector_store_path}")
     else:
         # Load and process the PDF, then create the vector store
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200, length_function=len)
         text = load_pdf_text(file_path)
         chunks = text_splitter.split_text(text=text)
         embeddings = OpenAIEmbeddings()
@@ -248,23 +235,6 @@ def display_session_id():
     st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
     st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
-def query_pinecone(vector, index, top_k=5):
-    # Query Pinecone index for similar vectors
-    query_results = index.query(vector=vector, top_k=top_k)
-    return query_results["matches"]
-from sentence_transformers import SentenceTransformer
-# Initialize the Sentence Transformer model
-model = SentenceTransformer('all-MiniLM-L6-v2')
-def text_to_vector(text):
-    # Convert input text to vector
-    embedding = model.encode(text)
-    return embedding  # No need to convert to list, depending on how you use it later
 def page1():
     try:
@@ -510,52 +480,125 @@ def page2():
 def page3():
     try:
-        st.markdown("""
-            <style>
-            #MainMenu {visibility: hidden;}
-            footer {visibility: hidden;}
-            </style>
-            """, unsafe_allow_html=True)
-        col1, col2 = st.columns([3, 1])
         with col1:
             st.title("Kosten- und Strukturdaten der Krankenhäuser")
         with col2:
-            image_path = 'BinDoc Logo (Quadratisch).png'
-            if os.path.exists(image_path):
-                image = Image.open(image_path)
-                st.image(image, use_column_width='always')
-        display_chat_history(st.session_state.get('chat_history_page3', []))
         query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
         if query:
-            query_vector = text_to_vector(query)
-            # Ensure the vector is in the correct format for Pinecone
-            query_vector = query_vector.tolist() if isinstance(query_vector, np.ndarray) else query_vector
-            # Query the Pinecone index
-            results = pc.query(index=index_name, vector=query_vector, top_k=5)
-            # Process and display results
-            for result in results['matches']:
-                matched_id = result['id']
-                score = result['score']
-                # Assuming you have metadata to display, adjust as necessary
-                st.write(f"Matched ID: {matched_id}, Score: {score}")
-            # Update chat history
-            st.session_state['chat_history_page3'].append(("User", query))
-            for result in results['matches']:
-                st.session_state['chat_history_page3'].append(("Result", f"Matched ID: {result['id']}, Score: {result['score']}"))
-            # Save the updated chat history
-            save_conversation(st.session_state['chat_history_page3'], st.session_state['session_id'])
-    except Exception as e:
-        st.error(f"An unexpected error occurred: {e}")
 def page4():
     try:
@@ -618,4 +661,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import streamlit as st
 from PIL import Image
 import random
 import time
 from dotenv import load_dotenv
 import os
 import uuid
 import json
 import pandas as pd
 import pydeck as pdk
 from urllib.error import URLError
 if 'session_id' not in st.session_state:
     st.session_state['session_id'] = str(uuid.uuid4())
 # Step 1: Clone the Dataset Repository
         #st.text(f"Loaded existing vector store from {vector_store_path}")
     else:
         # Load and process the PDF, then create the vector store
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100, length_function=len)
         text = load_pdf_text(file_path)
         chunks = text_splitter.split_text(text=text)
         embeddings = OpenAIEmbeddings()
     st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
     st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
 def page1():
     try:
 def page3():
     try:
+        hide_streamlit_style = """
+                <style>
+                #MainMenu {visibility: hidden;}
+                footer {visibility: hidden;}
+                </style>
+                """
+        st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+         # Create columns for layout
+        col1, col2 = st.columns([3, 1])  # Adjust the ratio to your liking
         with col1:
             st.title("Kosten- und Strukturdaten der Krankenhäuser")
         with col2:
+            # Load and display the image in the right column, which will be the top-right corner of the page
+            image = Image.open('BinDoc Logo (Quadratisch).png')
+            st.image(image, use_column_width='always')
+        if not os.path.exists(pdf_path2):
+            st.error("File not found. Please check the file path.")
+            return
+        VectorStore = load_vector_store(pdf_path3, "Kosten_Str_2301", force_reload=True)
+        display_chat_history(st.session_state['chat_history_page3'])
+        st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
+        st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
+        st.write("<!-- End Spacer -->", unsafe_allow_html=True)
+        new_messages_placeholder = st.empty()
         query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
+        add_vertical_space(2)  # Adjust as per the desired spacing
+        # Create two columns for the buttons
+        col1, col2 = st.columns(2)
+        with col1:
+            if st.button("Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"):
+                query = "Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"
+            if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
+                query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
+            if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
+                query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
+        with col2:
+            if st.button("Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"):
+                query = "Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"
+            if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
+                query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
+            if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
+                query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
         if query:
+            full_query = ask_bot(query)
+            st.session_state['chat_history_page3'].append(("User", query, "new"))
+            # Start timing
+            start_time = time.time()
+            # Create a placeholder for the response time
+            response_time_placeholder = st.empty()
+            with st.spinner('Eve denkt über Ihre Frage nach...'):
+                chain = load_chatbot()
+                docs = VectorStore.similarity_search(query=query, k=5)
+                with get_openai_callback() as cb:
+                    response = chain.run(input_documents=docs, question=full_query)
+                    response = handle_no_answer(response)  # Process the response through the new function
+            # Stop timing
+            end_time = time.time()
+            # Calculate duration
+            duration = end_time - start_time
+            st.session_state['chat_history_page3'].append(("Eve", response, "new"))
+            # Combine chat histories from all pages
+            all_chat_histories = [
+                st.session_state['chat_history_page1'],
+                st.session_state['chat_history_page2'],
+                st.session_state['chat_history_page3']
+            ]
+            # Save the combined chat histories
+            save_conversation(all_chat_histories, st.session_state['session_id'])
+            # Display new messages at the bottom
+            new_messages = st.session_state['chat_history_page3'][-2:]
+            for chat in new_messages:
+                background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
+                new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
+            # Update the response time placeholder after the messages are displayed
+            response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
+            # Clear the input field after the query is made
+            query = ""
+        # Mark all messages as old after displaying
+        st.session_state['chat_history_page3'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page3']]
+    except Exception as e:
+        st.error(f"Upsi, an unexpected error occurred: {e}")
+        # Optionally log the exception details to a file or error tracking service
 def page4():
     try:
 if __name__ == "__main__":
+    main()