Anne31415 commited on
Commit
4a2f6f3
1 Parent(s): 9a6164d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -42
app.py CHANGED
@@ -16,12 +16,14 @@ from langchain.callbacks import get_openai_callback
16
  import os
17
  import uuid
18
  import json
19
-
20
-
21
  import pandas as pd
22
  import pydeck as pdk
23
  from urllib.error import URLError
24
 
 
 
 
 
25
  # Initialize session state variables
26
  if 'chat_history_page1' not in st.session_state:
27
  st.session_state['chat_history_page1'] = []
@@ -59,6 +61,8 @@ repo.git_pull() # Pull the latest changes (if any)
59
 
60
 
61
  # Step 2: Load the PDF File
 
 
62
  pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
63
 
64
  pdf_path2 = "Private_Book/Buch_23012024.pdf"
@@ -68,6 +72,32 @@ pdf_path3 = "Private_Book/Kosten_Strukturdaten_RAG_vorbereited.pdf"
68
  api_key = os.getenv("OPENAI_API_KEY")
69
  # Retrieve the API key from st.secrets
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  @st.cache_resource
@@ -115,6 +145,8 @@ def load_vector_store(file_path, store_name, force_reload=False):
115
  return VectorStore
116
 
117
 
 
 
118
  # Utility function to load text from a PDF
119
  def load_pdf_text(file_path):
120
  pdf_reader = PdfReader(file_path)
@@ -478,6 +510,8 @@ def page2():
478
 
479
 
480
 
 
 
481
  def page3():
482
  try:
483
  hide_streamlit_style = """
@@ -488,7 +522,7 @@ def page3():
488
  """
489
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
490
 
491
- # Create columns for layout
492
  col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
493
 
494
  with col1:
@@ -499,14 +533,13 @@ def page3():
499
  image = Image.open('BinDoc Logo (Quadratisch).png')
500
  st.image(image, use_column_width='always')
501
 
502
-
503
- if not os.path.exists(pdf_path2):
504
  st.error("File not found. Please check the file path.")
505
  return
506
 
507
- VectorStore = load_vector_store(pdf_path3, "Kosten_Str_2301", force_reload=True)
508
-
509
-
510
 
511
  display_chat_history(st.session_state['chat_history_page3'])
512
 
@@ -524,51 +557,42 @@ def page3():
524
  col1, col2 = st.columns(2)
525
 
526
  with col1:
527
- if st.button("Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"):
528
- query = "Wie hat sich die Bettenanzahl in den letzten 10 Jahren entwickelt?"
529
- if st.button("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?"):
530
- query = ("Wie viele Patienten wurden im Jahr 2017 vollstationär behandelt?")
531
- if st.button("Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern?"):
532
- query = "Wie viele Vollkräfte arbeiten in Summe 2021 in deutschen Krankenhäusern? "
533
-
534
 
535
  with col2:
536
- if st.button("Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"):
537
- query = "Welche unterschiedlichen Personalkosten gibt es im Krankenhaus?"
538
- if st.button("Welche Sachkosten werden in Krankenhäusern unterschieden?"):
539
- query = "Welche Sachkosten werden in Krankenhäusern unterschieden? "
540
- if st.button("Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"):
541
- query = "Wie hoch sind die Gesamtkosten der Krankenhäuser pro Jahr: 2019, 2020, 2021?"
542
 
543
-
544
 
 
545
  if query:
546
  full_query = ask_bot(query)
547
  st.session_state['chat_history_page3'].append(("User", query, "new"))
548
-
549
- # Start timing
550
  start_time = time.time()
551
-
552
- # Create a placeholder for the response time
553
- response_time_placeholder = st.empty()
554
-
555
- with st.spinner('Eve denkt über Ihre Frage nach...'):
556
- chain = load_chatbot()
557
- docs = VectorStore.similarity_search(query=query, k=5)
558
- with get_openai_callback() as cb:
559
- response = chain.run(input_documents=docs, question=full_query)
560
- response = handle_no_answer(response) # Process the response through the new function
561
-
562
-
563
 
564
- # Stop timing
565
  end_time = time.time()
566
-
567
- # Calculate duration
568
  duration = end_time - start_time
569
-
 
 
 
 
 
 
 
570
  st.session_state['chat_history_page3'].append(("Eve", response, "new"))
571
 
 
572
  # Combine chat histories from all pages
573
  all_chat_histories = [
574
  st.session_state['chat_history_page1'],
@@ -579,7 +603,6 @@ def page3():
579
  # Save the combined chat histories
580
  save_conversation(all_chat_histories, st.session_state['session_id'])
581
 
582
-
583
  # Display new messages at the bottom
584
  new_messages = st.session_state['chat_history_page3'][-2:]
585
  for chat in new_messages:
@@ -589,7 +612,6 @@ def page3():
589
  # Update the response time placeholder after the messages are displayed
590
  response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
591
 
592
-
593
  # Clear the input field after the query is made
594
  query = ""
595
 
@@ -600,6 +622,9 @@ def page3():
600
  st.error(f"Upsi, an unexpected error occurred: {e}")
601
  # Optionally log the exception details to a file or error tracking service
602
 
 
 
 
603
  def page4():
604
  try:
605
  st.header(":mailbox: Kontakt & Feedback!")
 
16
  import os
17
  import uuid
18
  import json
 
 
19
  import pandas as pd
20
  import pydeck as pdk
21
  from urllib.error import URLError
22
 
23
+ import chromadb
24
+ client = chromadb.Client()
25
+ collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
26
+
27
  # Initialize session state variables
28
  if 'chat_history_page1' not in st.session_state:
29
  st.session_state['chat_history_page1'] = []
 
61
 
62
 
63
  # Step 2: Load the PDF File
64
+
65
+
66
  pdf_path = "Private_Book/KH_Reform230124.pdf" # Replace with your PDF file path
67
 
68
  pdf_path2 = "Private_Book/Buch_23012024.pdf"
 
72
  api_key = os.getenv("OPENAI_API_KEY")
73
  # Retrieve the API key from st.secrets
74
 
75
+ import chromadb
76
+
77
+ # Corrected variable name for consistency
78
+ chroma_client = chromadb.Client()
79
+
80
+ # Create a collection for your embeddings
81
+ collection_name = "Kosten_Strukturdaten"
82
+ collection = chroma_client.create_collection(name=collection_name)
83
+
84
+ # Function to extract text from a PDF file
85
+ def extract_text_from_pdf(pdf_path):
86
+ text = ""
87
+ reader = PdfReader(pdf_path)
88
+ for page in reader.pages:
89
+ text += page.extract_text() + " " # Concatenate text from each page
90
+ return text
91
+
92
+ # Example usage
93
+ pdf_text = extract_text_from_pdf(pdf_path3)
94
+
95
+ # Add the extracted text from PDF to the Chroma collection
96
+ collection.add(
97
+ documents=[pdf_text],
98
+ metadatas=[{"source": pdf_path3}], # Add any relevant metadata for your document
99
+ ids=["Kosten_Strukturdaten")]
100
+ )
101
 
102
 
103
  @st.cache_resource
 
145
  return VectorStore
146
 
147
 
148
+
149
+
150
  # Utility function to load text from a PDF
151
  def load_pdf_text(file_path):
152
  pdf_reader = PdfReader(file_path)
 
510
 
511
 
512
 
513
+ # Correcting the indentation error and completing the CromA database integration in page3()
514
+
515
  def page3():
516
  try:
517
  hide_streamlit_style = """
 
522
  """
523
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
524
 
525
+ # Create columns for layout
526
  col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
527
 
528
  with col1:
 
533
  image = Image.open('BinDoc Logo (Quadratisch).png')
534
  st.image(image, use_column_width='always')
535
 
536
+ if not os.path.exists(pdf_path3):
 
537
  st.error("File not found. Please check the file path.")
538
  return
539
 
540
+ # Initialize CromA client and collection
541
+ chroma_client = chromadb.Client()
542
+ collection = chroma_client.create_collection(name="Kosten_Strukturdaten")
543
 
544
  display_chat_history(st.session_state['chat_history_page3'])
545
 
 
557
  col1, col2 = st.columns(2)
558
 
559
  with col1:
560
+ if st.button("Test1"):
561
+ query = "Test1"
 
 
 
 
 
562
 
563
  with col2:
564
+ if st.button("Test2"):
565
+ query = "Test2"
 
 
 
 
566
 
 
567
 
568
+ # Handling query input
569
  if query:
570
  full_query = ask_bot(query)
571
  st.session_state['chat_history_page3'].append(("User", query, "new"))
572
+
573
+ # Start timing for response
574
  start_time = time.time()
575
+
576
+ # Querying the CromA collection
577
+ results = collection.query(
578
+ query_texts=[full_query],
579
+ n_results=5 # Adjust the number of results as needed
580
+ )
 
 
 
 
 
 
581
 
582
+ # Calculate the response duration
583
  end_time = time.time()
 
 
584
  duration = end_time - start_time
585
+
586
+ # Process and display response from CromA results
587
+ if results:
588
+ # TODO: Adjust the following logic based on CromA's actual result structure
589
+ response = f"Top result: {results[0]['text']}" # Example response using the first result
590
+ else:
591
+ response = "No results found for your query."
592
+
593
  st.session_state['chat_history_page3'].append(("Eve", response, "new"))
594
 
595
+
596
  # Combine chat histories from all pages
597
  all_chat_histories = [
598
  st.session_state['chat_history_page1'],
 
603
  # Save the combined chat histories
604
  save_conversation(all_chat_histories, st.session_state['session_id'])
605
 
 
606
  # Display new messages at the bottom
607
  new_messages = st.session_state['chat_history_page3'][-2:]
608
  for chat in new_messages:
 
612
  # Update the response time placeholder after the messages are displayed
613
  response_time_placeholder.text(f"Response time: {duration:.2f} seconds")
614
 
 
615
  # Clear the input field after the query is made
616
  query = ""
617
 
 
622
  st.error(f"Upsi, an unexpected error occurred: {e}")
623
  # Optionally log the exception details to a file or error tracking service
624
 
625
+
626
+
627
+
628
  def page4():
629
  try:
630
  st.header(":mailbox: Kontakt & Feedback!")