Anne31415 commited on
Commit
9a6164d
1 Parent(s): fe37ee0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -65
app.py CHANGED
@@ -1,9 +1,5 @@
1
  import streamlit as st
2
- from sentence_transformers import SentenceTransformer
3
- import numpy as np
4
  from PIL import Image
5
- import pinecone
6
- import pinecone_text
7
  import random
8
  import time
9
  from dotenv import load_dotenv
@@ -20,6 +16,8 @@ from langchain.callbacks import get_openai_callback
20
  import os
21
  import uuid
22
  import json
 
 
23
  import pandas as pd
24
  import pydeck as pdk
25
  from urllib.error import URLError
@@ -38,17 +36,6 @@ if 'chat_history_page3' not in st.session_state:
38
  if 'session_id' not in st.session_state:
39
  st.session_state['session_id'] = str(uuid.uuid4())
40
 
41
- from pinecone import Pinecone
42
-
43
- # Initialize Pinecone client
44
- api_key = os.environ["api_key"]
45
-
46
- # configure client
47
- pc = Pinecone(api_key=api_key)
48
-
49
- index_name = "canopy--document-uploader" # Replace with your chosen index name
50
-
51
- index = pc.Index(name=index_name)
52
 
53
 
54
  # Step 1: Clone the Dataset Repository
@@ -95,7 +82,7 @@ def load_vector_store(file_path, store_name, force_reload=False):
95
  #st.text(f"Loaded existing vector store from {vector_store_path}")
96
  else:
97
  # Load and process the PDF, then create the vector store
98
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200, length_function=len)
99
  text = load_pdf_text(file_path)
100
  chunks = text_splitter.split_text(text=text)
101
  embeddings = OpenAIEmbeddings()
@@ -248,23 +235,6 @@ def display_session_id():
248
  st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
249
  st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
250
 
251
-
252
- def query_pinecone(vector, index, top_k=5):
253
- # Query Pinecone index for similar vectors
254
- query_results = index.query(vector=vector, top_k=top_k)
255
- return query_results["matches"]
256
-
257
- from sentence_transformers import SentenceTransformer
258
-
259
- # Initialize the Sentence Transformer model
260
- model = SentenceTransformer('all-MiniLM-L6-v2')
261
-
262
- def text_to_vector(text):
263
- # Convert input text to vector
264
- embedding = model.encode(text)
265
- return embedding # No need to convert to list, depending on how you use it later
266
-
267
-
268
 
269
  def page1():
270
  try:
@@ -510,52 +480,125 @@ def page2():
510
 
511
  def page3():
512
  try:
513
- st.markdown("""
514
- <style>
515
- #MainMenu {visibility: hidden;}
516
- footer {visibility: hidden;}
517
- </style>
518
- """, unsafe_allow_html=True)
 
519
 
520
- col1, col2 = st.columns([3, 1])
 
 
521
  with col1:
522
  st.title("Kosten- und Strukturdaten der Krankenhäuser")
 
523
  with col2:
524
- image_path = 'BinDoc Logo (Quadratisch).png'
525
- if os.path.exists(image_path):
526
- image = Image.open(image_path)
527
- st.image(image, use_column_width='always')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
 
529
- display_chat_history(st.session_state.get('chat_history_page3', []))
530
 
531
  query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  if query:
534
- query_vector = text_to_vector(query)
535
- # Ensure the vector is in the correct format for Pinecone
536
- query_vector = query_vector.tolist() if isinstance(query_vector, np.ndarray) else query_vector
 
 
 
 
 
537
 
538
- # Query the Pinecone index
539
- results = pc.query(index=index_name, vector=query_vector, top_k=5)
 
 
 
 
540
 
541
- # Process and display results
542
- for result in results['matches']:
543
- matched_id = result['id']
544
- score = result['score']
545
- # Assuming you have metadata to display, adjust as necessary
546
- st.write(f"Matched ID: {matched_id}, Score: {score}")
547
 
548
- # Update chat history
549
- st.session_state['chat_history_page3'].append(("User", query))
550
- for result in results['matches']:
551
- st.session_state['chat_history_page3'].append(("Result", f"Matched ID: {result['id']}, Score: {result['score']}"))
 
 
552
 
553
- # Save the updated chat history
554
- save_conversation(st.session_state['chat_history_page3'], st.session_state['session_id'])
555
 
556
- except Exception as e:
557
- st.error(f"An unexpected error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
 
 
 
 
559
 
560
  def page4():
561
  try:
@@ -618,4 +661,4 @@ def main():
618
 
619
 
620
  if __name__ == "__main__":
621
- main()
 
1
  import streamlit as st
 
 
2
  from PIL import Image
 
 
3
  import random
4
  import time
5
  from dotenv import load_dotenv
 
16
  import os
17
  import uuid
18
  import json
19
+
20
+
21
  import pandas as pd
22
  import pydeck as pdk
23
  from urllib.error import URLError
 
36
  if 'session_id' not in st.session_state:
37
  st.session_state['session_id'] = str(uuid.uuid4())
38
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  # Step 1: Clone the Dataset Repository
 
82
  #st.text(f"Loaded existing vector store from {vector_store_path}")
83
  else:
84
  # Load and process the PDF, then create the vector store
85
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100, length_function=len)
86
  text = load_pdf_text(file_path)
87
  chunks = text_splitter.split_text(text=text)
88
  embeddings = OpenAIEmbeddings()
 
235
  st.sidebar.markdown(f"**Ihre Session ID:** `{session_id}`")
236
  st.sidebar.markdown("Verwenden Sie diese ID als Referenz bei Mitteilungen oder Rückmeldungen.")
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  def page1():
240
  try:
 
480
 
481
  def page3():
482
  try:
483
+ hide_streamlit_style = """
484
+ <style>
485
+ #MainMenu {visibility: hidden;}
486
+ footer {visibility: hidden;}
487
+ </style>
488
+ """
489
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
490
 
491
+ # Create columns for layout
492
+ col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
493
+
494
  with col1:
495
  st.title("Kosten- und Strukturdaten der Krankenhäuser")
496
+
497
  with col2:
498
+ # Load and display the image in the right column, which will be the top-right corner of the page
499
+ image = Image.open('BinDoc Logo (Quadratisch).png')
500
+ st.image(image, use_column_width='always')
501
+
502
+
503
+ if not os.path.exists(pdf_path2):
504
+ st.error("File not found. Please check the file path.")
505
+ return
506
+
507
+ VectorStore = load_vector_store(pdf_path3, "Kosten_Str_2301", force_reload=True)
508
+
509
+
510
+
511
+ display_chat_history(st.session_state['chat_history_page3'])
512
+
513
+ st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
514
+ st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
515
+ st.write("<!-- End Spacer -->", unsafe_allow_html=True)
516
 
517
+ new_messages_placeholder = st.empty()
518
 
519
  query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
520
 
521
+ add_vertical_space(2) # Adjust as per the desired spacing
522
+
523
+ # Create two columns for the buttons
524
+ col1, col2 = st.columns(2)
525
+
526
+ with col1:
527
+ if st.button("Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"):
528
+ query = "Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"
529
+ if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
530
+ query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
531
+ if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
532
+ query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
533
+
534
+
535
+ with col2:
536
+ if st.button("Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"):
537
+ query = "Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"
538
+ if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
539
+ query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
540
+ if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
541
+ query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
542
+
543
+
544
+
545
  if query:
546
+ full_query = ask_bot(query)
547
+ st.session_state['chat_history_page3'].append(("User", query, "new"))
548
+
549
+ # Start timing
550
+ start_time = time.time()
551
+
552
+ # Create a placeholder for the response time
553
+ response_time_placeholder = st.empty()
554
 
555
+ with st.spinner('Eve denkt über Ihre Frage nach...'):
556
+ chain = load_chatbot()
557
+ docs = VectorStore.similarity_search(query=query, k=5)
558
+ with get_openai_callback() as cb:
559
+ response = chain.run(input_documents=docs, question=full_query)
560
+ response = handle_no_answer(response) # Process the response through the new function
561
 
 
 
 
 
 
 
562
 
563
+
564
+ # Stop timing
565
+ end_time = time.time()
566
+
567
+ # Calculate duration
568
+ duration = end_time - start_time
569
 
570
+ st.session_state['chat_history_page3'].append(("Eve", response, "new"))
 
571
 
572
+ # Combine chat histories from all pages
573
+ all_chat_histories = [
574
+ st.session_state['chat_history_page1'],
575
+ st.session_state['chat_history_page2'],
576
+ st.session_state['chat_history_page3']
577
+ ]
578
+
579
+ # Save the combined chat histories
580
+ save_conversation(all_chat_histories, st.session_state['session_id'])
581
+
582
+
583
+ # Display new messages at the bottom
584
+ new_messages = st.session_state['chat_history_page3'][-2:]
585
+ for chat in new_messages:
586
+ background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
587
+ new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
588
+
589
+ # Update the response time placeholder after the messages are displayed
590
+ response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
591
+
592
+
593
+ # Clear the input field after the query is made
594
+ query = ""
595
+
596
+ # Mark all messages as old after displaying
597
+ st.session_state['chat_history_page3'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page3']]
598
 
599
+ except Exception as e:
600
+ st.error(f"Upsi, an unexpected error occurred: {e}")
601
+ # Optionally log the exception details to a file or error tracking service
602
 
603
  def page4():
604
  try:
 
661
 
662
 
663
  if __name__ == "__main__":
664
+ main()