Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,12 +16,14 @@ from langchain.callbacks import get_openai_callback
|
|
16 |
import os
|
17 |
import uuid
|
18 |
import json
|
19 |
-
|
20 |
-
|
21 |
import pandas as pd
|
22 |
import pydeck as pdk
|
23 |
from urllib.error import URLError
|
24 |
|
|
|
|
|
|
|
|
|
25 |
# Initialize session state variables
|
26 |
if 'chat_history_page1' not in st.session_state:
|
27 |
st.session_state['chat_history_page1'] = []
|
@@ -59,6 +61,8 @@ repo.git_pull() # Pull the latest changes (if any)
|
|
59 |
|
60 |
|
61 |
# Step 2: Load the PDF File
|
|
|
|
|
62 |
pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
|
63 |
|
64 |
pdf_path2 = "Private_Book/Buch_23012024.pdf"
|
@@ -68,6 +72,32 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
|
|
68 |
api_key = os.getenv("OPENAI_API_KEY")
|
69 |
# Retrieve the API key from st.secrets
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
@st.cache_resource
|
@@ -115,6 +145,8 @@ def load_vector_store(file_path, store_name, force_reload=False):
|
|
115 |
return VectorStore
|
116 |
|
117 |
|
|
|
|
|
118 |
# Utility function to load text from a PDF
|
119 |
def load_pdf_text(file_path):
|
120 |
pdf_reader = PdfReader(file_path)
|
@@ -478,6 +510,8 @@ def page2():
|
|
478 |
|
479 |
|
480 |
|
|
|
|
|
481 |
def page3():
|
482 |
try:
|
483 |
hide_streamlit_style = """
|
@@ -488,7 +522,7 @@ def page3():
|
|
488 |
"""
|
489 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
490 |
|
491 |
-
|
492 |
col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
|
493 |
|
494 |
with col1:
|
@@ -499,14 +533,13 @@ def page3():
|
|
499 |
image = Image.open('BinDoc Logo (Quadratisch).png')
|
500 |
st.image(image, use_column_width='always')
|
501 |
|
502 |
-
|
503 |
-
if not os.path.exists(pdf_path2):
|
504 |
st.error("File not found. Please check the file path.")
|
505 |
return
|
506 |
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
|
511 |
display_chat_history(st.session_state['chat_history_page3'])
|
512 |
|
@@ -524,51 +557,42 @@ def page3():
|
|
524 |
col1, col2 = st.columns(2)
|
525 |
|
526 |
with col1:
|
527 |
-
if st.button("
|
528 |
-
query = "
|
529 |
-
if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
|
530 |
-
query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
|
531 |
-
if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
|
532 |
-
query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
|
533 |
-
|
534 |
|
535 |
with col2:
|
536 |
-
if st.button("
|
537 |
-
query = "
|
538 |
-
if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
|
539 |
-
query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
|
540 |
-
if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
|
541 |
-
query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
|
542 |
|
543 |
-
|
544 |
|
|
|
545 |
if query:
|
546 |
full_query = ask_bot(query)
|
547 |
st.session_state['chat_history_page3'].append(("User", query, "new"))
|
548 |
-
|
549 |
-
# Start timing
|
550 |
start_time = time.time()
|
551 |
-
|
552 |
-
#
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
docs = VectorStore.similarity_search(query=query, k=5)
|
558 |
-
with get_openai_callback() as cb:
|
559 |
-
response = chain.run(input_documents=docs, question=full_query)
|
560 |
-
response = handle_no_answer(response) # Process the response through the new function
|
561 |
-
|
562 |
-
|
563 |
|
564 |
-
#
|
565 |
end_time = time.time()
|
566 |
-
|
567 |
-
# Calculate duration
|
568 |
duration = end_time - start_time
|
569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
st.session_state['chat_history_page3'].append(("Eve", response, "new"))
|
571 |
|
|
|
572 |
# Combine chat histories from all pages
|
573 |
all_chat_histories = [
|
574 |
st.session_state['chat_history_page1'],
|
@@ -579,7 +603,6 @@ def page3():
|
|
579 |
# Save the combined chat histories
|
580 |
save_conversation(all_chat_histories, st.session_state['session_id'])
|
581 |
|
582 |
-
|
583 |
# Display new messages at the bottom
|
584 |
new_messages = st.session_state['chat_history_page3'][-2:]
|
585 |
for chat in new_messages:
|
@@ -589,7 +612,6 @@ def page3():
|
|
589 |
# Update the response time placeholder after the messages are displayed
|
590 |
response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
|
591 |
|
592 |
-
|
593 |
# Clear the input field after the query is made
|
594 |
query = ""
|
595 |
|
@@ -600,6 +622,9 @@ def page3():
|
|
600 |
st.error(f"Upsi, an unexpected error occurred: {e}")
|
601 |
# Optionally log the exception details to a file or error tracking service
|
602 |
|
|
|
|
|
|
|
603 |
def page4():
|
604 |
try:
|
605 |
st.header(":mailbox: Kontakt & Feedback!")
|
|
|
16 |
import os
|
17 |
import uuid
|
18 |
import json
|
|
|
|
|
19 |
import pandas as pd
|
20 |
import pydeck as pdk
|
21 |
from urllib.error import URLError
|
22 |
|
23 |
+
import chromadb
|
24 |
+
client = chromadb.Client()
|
25 |
+
collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
|
26 |
+
|
27 |
# Initialize session state variables
|
28 |
if 'chat_history_page1' not in st.session_state:
|
29 |
st.session_state['chat_history_page1'] = []
|
|
|
61 |
|
62 |
|
63 |
# Step 2: Load the PDF File
|
64 |
+
|
65 |
+
|
66 |
pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
|
67 |
|
68 |
pdf_path2 = "Private_Book/Buch_23012024.pdf"
|
|
|
72 |
api_key = os.getenv("OPENAI_API_KEY")
|
73 |
# Retrieve the API key from st.secrets
|
74 |
|
75 |
+
import chromadb
|
76 |
+
|
77 |
+
# Corrected variable name for consistency
|
78 |
+
chroma_client = chromadb.Client()
|
79 |
+
|
80 |
+
# Create a collection for your embeddings
|
81 |
+
collection_name = "Kosten_Strukturdaten"
|
82 |
+
collection = chroma_client.create_collection(name=collection_name)
|
83 |
+
|
84 |
+
# Function to extract text from a PDF file
|
85 |
+
def extract_text_from_pdf(pdf_path):
|
86 |
+
text = ""
|
87 |
+
reader = PdfReader(pdf_path)
|
88 |
+
for page in reader.pages:
|
89 |
+
text += page.extract_text() + " " # Concatenate text from each page
|
90 |
+
return text
|
91 |
+
|
92 |
+
# Example usage
|
93 |
+
pdf_text = extract_text_from_pdf(pdf_path3)
|
94 |
+
|
95 |
+
# Add the extracted text from PDF to the Chroma collection
|
96 |
+
collection.add(
|
97 |
+
documents=[pdf_text],
|
98 |
+
metadatas=[{"source": pdf_path3}], # Add any relevant metadata for your document
|
99 |
+
ids=["Kosten_Strukturdaten")]
|
100 |
+
)
|
101 |
|
102 |
|
103 |
@st.cache_resource
|
|
|
145 |
return VectorStore
|
146 |
|
147 |
|
148 |
+
|
149 |
+
|
150 |
# Utility function to load text from a PDF
|
151 |
def load_pdf_text(file_path):
|
152 |
pdf_reader = PdfReader(file_path)
|
|
|
510 |
|
511 |
|
512 |
|
513 |
+
# Correcting the indentation error and completing the CromA database integration in page3()
|
514 |
+
|
515 |
def page3():
|
516 |
try:
|
517 |
hide_streamlit_style = """
|
|
|
522 |
"""
|
523 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
524 |
|
525 |
+
# Create columns for layout
|
526 |
col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
|
527 |
|
528 |
with col1:
|
|
|
533 |
image = Image.open('BinDoc Logo (Quadratisch).png')
|
534 |
st.image(image, use_column_width='always')
|
535 |
|
536 |
+
if not os.path.exists(pdf_path3):
|
|
|
537 |
st.error("File not found. Please check the file path.")
|
538 |
return
|
539 |
|
540 |
+
# Initialize CromA client and collection
|
541 |
+
chroma_client = chromadb.Client()
|
542 |
+
collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
|
543 |
|
544 |
display_chat_history(st.session_state['chat_history_page3'])
|
545 |
|
|
|
557 |
col1, col2 = st.columns(2)
|
558 |
|
559 |
with col1:
|
560 |
+
if st.button("Test1"):
|
561 |
+
query = "Test1"
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
with col2:
|
564 |
+
if st.button("Test2"):
|
565 |
+
query = "Test2"
|
|
|
|
|
|
|
|
|
566 |
|
|
|
567 |
|
568 |
+
# Handling query input
|
569 |
if query:
|
570 |
full_query = ask_bot(query)
|
571 |
st.session_state['chat_history_page3'].append(("User", query, "new"))
|
572 |
+
|
573 |
+
# Start timing for response
|
574 |
start_time = time.time()
|
575 |
+
|
576 |
+
# Querying the CromA collection
|
577 |
+
results = collection.query(
|
578 |
+
query_texts=[full_query],
|
579 |
+
n_results=5 # Adjust the number of results as needed
|
580 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
+
# Calculate the response duration
|
583 |
end_time = time.time()
|
|
|
|
|
584 |
duration = end_time - start_time
|
585 |
+
|
586 |
+
# Process and display response from CromA results
|
587 |
+
if results:
|
588 |
+
# TODO: Adjust the following logic based on CromA's actual result structure
|
589 |
+
response = f"Top result: {results[0]['text']}" # Example response using the first result
|
590 |
+
else:
|
591 |
+
response = "No results found for your query."
|
592 |
+
|
593 |
st.session_state['chat_history_page3'].append(("Eve", response, "new"))
|
594 |
|
595 |
+
|
596 |
# Combine chat histories from all pages
|
597 |
all_chat_histories = [
|
598 |
st.session_state['chat_history_page1'],
|
|
|
603 |
# Save the combined chat histories
|
604 |
save_conversation(all_chat_histories, st.session_state['session_id'])
|
605 |
|
|
|
606 |
# Display new messages at the bottom
|
607 |
new_messages = st.session_state['chat_history_page3'][-2:]
|
608 |
for chat in new_messages:
|
|
|
612 |
# Update the response time placeholder after the messages are displayed
|
613 |
response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
|
614 |
|
|
|
615 |
# Clear the input field after the query is made
|
616 |
query = ""
|
617 |
|
|
|
622 |
st.error(f"Upsi, an unexpected error occurred: {e}")
|
623 |
# Optionally log the exception details to a file or error tracking service
|
624 |
|
625 |
+
|
626 |
+
|
627 |
+
|
628 |
def page4():
|
629 |
try:
|
630 |
st.header(":mailbox: Kontakt & Feedback!")
|