Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from sentence_transformers import SentenceTransformer
|
3 |
-
import numpy as np
|
4 |
from PIL import Image
|
5 |
-
import pinecone
|
6 |
-
import pinecone_text
|
7 |
import random
|
8 |
import time
|
9 |
from dotenv import load_dotenv
|
@@ -20,6 +16,8 @@ from langchain.callbacks import get_openai_callback
|
|
20 |
import os
|
21 |
import uuid
|
22 |
import json
|
|
|
|
|
23 |
import pandas as pd
|
24 |
import pydeck as pdk
|
25 |
from urllib.error import URLError
|
@@ -38,17 +36,6 @@ if 'chat_history_page3' not in st.session_state:
|
|
38 |
if 'session_id' not in st.session_state:
|
39 |
st.session_state['session_id'] = str(uuid.uuid4())
|
40 |
|
41 |
-
from pinecone import Pinecone
|
42 |
-
|
43 |
-
# Initialize Pinecone client
|
44 |
-
api_key = os.environ["api_key"]
|
45 |
-
|
46 |
-
# configure client
|
47 |
-
pc = Pinecone(api_key=api_key)
|
48 |
-
|
49 |
-
index_name = "canopy--document-uploader" # Replace with your chosen index name
|
50 |
-
|
51 |
-
index = pc.Index(name=index_name)
|
52 |
|
53 |
|
54 |
# Step 1: Clone the Dataset Repository
|
@@ -95,7 +82,7 @@ def load_vector_store(file_path, store_name, force_reload=False):
|
|
95 |
#st.text(f"Loaded existing vector store from {vector_store_path}")
|
96 |
else:
|
97 |
# Load and process the PDF, then create the vector store
|
98 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=
|
99 |
text = load_pdf_text(file_path)
|
100 |
chunks = text_splitter.split_text(text=text)
|
101 |
embeddings = OpenAIEmbeddings()
|
@@ -248,23 +235,6 @@ def display_session_id():
|
|
248 |
st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
|
249 |
st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
|
250 |
|
251 |
-
|
252 |
-
def query_pinecone(vector, index, top_k=5):
|
253 |
-
# Query Pinecone index for similar vectors
|
254 |
-
query_results = index.query(vector=vector, top_k=top_k)
|
255 |
-
return query_results["matches"]
|
256 |
-
|
257 |
-
from sentence_transformers import SentenceTransformer
|
258 |
-
|
259 |
-
# Initialize the Sentence Transformer model
|
260 |
-
model = SentenceTransformer('all-MiniLM-L6-v2')
|
261 |
-
|
262 |
-
def text_to_vector(text):
|
263 |
-
# Convert input text to vector
|
264 |
-
embedding = model.encode(text)
|
265 |
-
return embedding # No need to convert to list, depending on how you use it later
|
266 |
-
|
267 |
-
|
268 |
|
269 |
def page1():
|
270 |
try:
|
@@ -510,52 +480,125 @@ def page2():
|
|
510 |
|
511 |
def page3():
|
512 |
try:
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
|
|
519 |
|
520 |
-
|
|
|
|
|
521 |
with col1:
|
522 |
st.title("Kosten- und Strukturdaten der Krankenhäuser")
|
|
|
523 |
with col2:
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
|
529 |
-
|
530 |
|
531 |
query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
|
532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
if query:
|
534 |
-
|
535 |
-
|
536 |
-
|
|
|
|
|
|
|
|
|
|
|
537 |
|
538 |
-
|
539 |
-
|
|
|
|
|
|
|
|
|
540 |
|
541 |
-
# Process and display results
|
542 |
-
for result in results['matches']:
|
543 |
-
matched_id = result['id']
|
544 |
-
score = result['score']
|
545 |
-
# Assuming you have metadata to display, adjust as necessary
|
546 |
-
st.write(f"Matched ID: {matched_id}, Score: {score}")
|
547 |
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
|
|
|
|
552 |
|
553 |
-
|
554 |
-
save_conversation(st.session_state['chat_history_page3'], st.session_state['session_id'])
|
555 |
|
556 |
-
|
557 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
558 |
|
|
|
|
|
|
|
559 |
|
560 |
def page4():
|
561 |
try:
|
@@ -618,4 +661,4 @@ def main():
|
|
618 |
|
619 |
|
620 |
if __name__ == "__main__":
|
621 |
-
main()
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
from PIL import Image
|
|
|
|
|
3 |
import random
|
4 |
import time
|
5 |
from dotenv import load_dotenv
|
|
|
16 |
import os
|
17 |
import uuid
|
18 |
import json
|
19 |
+
|
20 |
+
|
21 |
import pandas as pd
|
22 |
import pydeck as pdk
|
23 |
from urllib.error import URLError
|
|
|
36 |
if 'session_id' not in st.session_state:
|
37 |
st.session_state['session_id'] = str(uuid.uuid4())
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
# Step 1: Clone the Dataset Repository
|
|
|
82 |
#st.text(f"Loaded existing vector store from {vector_store_path}")
|
83 |
else:
|
84 |
# Load and process the PDF, then create the vector store
|
85 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100, length_function=len)
|
86 |
text = load_pdf_text(file_path)
|
87 |
chunks = text_splitter.split_text(text=text)
|
88 |
embeddings = OpenAIEmbeddings()
|
|
|
235 |
st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
|
236 |
st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
|
237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
def page1():
|
240 |
try:
|
|
|
480 |
|
481 |
def page3():
|
482 |
try:
|
483 |
+
hide_streamlit_style = """
|
484 |
+
<style>
|
485 |
+
#MainMenu {visibility: hidden;}
|
486 |
+
footer {visibility: hidden;}
|
487 |
+
</style>
|
488 |
+
"""
|
489 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
490 |
|
491 |
+
# Create columns for layout
|
492 |
+
col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
|
493 |
+
|
494 |
with col1:
|
495 |
st.title("Kosten- und Strukturdaten der Krankenhäuser")
|
496 |
+
|
497 |
with col2:
|
498 |
+
# Load and display the image in the right column, which will be the top-right corner of the page
|
499 |
+
image = Image.open('BinDoc Logo (Quadratisch).png')
|
500 |
+
st.image(image, use_column_width='always')
|
501 |
+
|
502 |
+
|
503 |
+
if not os.path.exists(pdf_path2):
|
504 |
+
st.error("File not found. Please check the file path.")
|
505 |
+
return
|
506 |
+
|
507 |
+
VectorStore = load_vector_store(pdf_path3, "Kosten_Str_2301", force_reload=True)
|
508 |
+
|
509 |
+
|
510 |
+
|
511 |
+
display_chat_history(st.session_state['chat_history_page3'])
|
512 |
+
|
513 |
+
st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
|
514 |
+
st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
|
515 |
+
st.write("<!-- End Spacer -->", unsafe_allow_html=True)
|
516 |
|
517 |
+
new_messages_placeholder = st.empty()
|
518 |
|
519 |
query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
|
520 |
|
521 |
+
add_vertical_space(2) # Adjust as per the desired spacing
|
522 |
+
|
523 |
+
# Create two columns for the buttons
|
524 |
+
col1, col2 = st.columns(2)
|
525 |
+
|
526 |
+
with col1:
|
527 |
+
if st.button("Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"):
|
528 |
+
query = "Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"
|
529 |
+
if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
|
530 |
+
query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
|
531 |
+
if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
|
532 |
+
query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
|
533 |
+
|
534 |
+
|
535 |
+
with col2:
|
536 |
+
if st.button("Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"):
|
537 |
+
query = "Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"
|
538 |
+
if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
|
539 |
+
query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
|
540 |
+
if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
|
541 |
+
query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
|
542 |
+
|
543 |
+
|
544 |
+
|
545 |
if query:
|
546 |
+
full_query = ask_bot(query)
|
547 |
+
st.session_state['chat_history_page3'].append(("User", query, "new"))
|
548 |
+
|
549 |
+
# Start timing
|
550 |
+
start_time = time.time()
|
551 |
+
|
552 |
+
# Create a placeholder for the response time
|
553 |
+
response_time_placeholder = st.empty()
|
554 |
|
555 |
+
with st.spinner('Eve denkt über Ihre Frage nach...'):
|
556 |
+
chain = load_chatbot()
|
557 |
+
docs = VectorStore.similarity_search(query=query, k=5)
|
558 |
+
with get_openai_callback() as cb:
|
559 |
+
response = chain.run(input_documents=docs, question=full_query)
|
560 |
+
response = handle_no_answer(response) # Process the response through the new function
|
561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
+
|
564 |
+
# Stop timing
|
565 |
+
end_time = time.time()
|
566 |
+
|
567 |
+
# Calculate duration
|
568 |
+
duration = end_time - start_time
|
569 |
|
570 |
+
st.session_state['chat_history_page3'].append(("Eve", response, "new"))
|
|
|
571 |
|
572 |
+
# Combine chat histories from all pages
|
573 |
+
all_chat_histories = [
|
574 |
+
st.session_state['chat_history_page1'],
|
575 |
+
st.session_state['chat_history_page2'],
|
576 |
+
st.session_state['chat_history_page3']
|
577 |
+
]
|
578 |
+
|
579 |
+
# Save the combined chat histories
|
580 |
+
save_conversation(all_chat_histories, st.session_state['session_id'])
|
581 |
+
|
582 |
+
|
583 |
+
# Display new messages at the bottom
|
584 |
+
new_messages = st.session_state['chat_history_page3'][-2:]
|
585 |
+
for chat in new_messages:
|
586 |
+
background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
|
587 |
+
new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
|
588 |
+
|
589 |
+
# Update the response time placeholder after the messages are displayed
|
590 |
+
response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
|
591 |
+
|
592 |
+
|
593 |
+
# Clear the input field after the query is made
|
594 |
+
query = ""
|
595 |
+
|
596 |
+
# Mark all messages as old after displaying
|
597 |
+
st.session_state['chat_history_page3'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page3']]
|
598 |
|
599 |
+
except Exception as e:
|
600 |
+
st.error(f"Upsi, an unexpected error occurred: {e}")
|
601 |
+
# Optionally log the exception details to a file or error tracking service
|
602 |
|
603 |
def page4():
|
604 |
try:
|
|
|
661 |
|
662 |
|
663 |
if __name__ == "__main__":
|
664 |
+
main()
|