Anne31415 commited on
Commit
72a2744
1 Parent(s): 5665da9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -113
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  import time
 
3
  from dotenv import load_dotenv
4
  import pickle
5
  from huggingface_hub import Repository
@@ -17,15 +18,13 @@ import os
17
  repo = Repository(
18
  local_dir="Private_Book", # Local directory to clone the repository
19
  repo_type="dataset", # Specify that this is a dataset repository
20
-
21
  clone_from="Anne31415/Private_Book", # Replace with your repository URL
22
-
23
  token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
24
  )
25
  repo.git_pull() # Pull the latest changes (if any)
26
 
27
  # Step 2: Load the PDF File
28
- pdf_file_path = "Private_Book/KOMBI_all2.pdf" # Replace with your PDF file path
29
 
30
  with st.sidebar:
31
  st.title('BinDoc GmbH')
@@ -50,136 +49,140 @@ with st.sidebar:
50
  api_key = os.getenv("OPENAI_API_KEY")
51
  # Retrieve the API key from st.secrets
52
 
53
-
54
- def load_pdf(file_path):
55
- pdf_reader = PdfReader(file_path)
56
- text = ""
57
- for page in pdf_reader.pages:
58
- text += page.extract_text()
59
-
60
- text_splitter = RecursiveCharacterTextSplitter(
61
- chunk_size=1000,
62
- chunk_overlap=200,
63
- length_function=len
64
- )
65
- chunks = text_splitter.split_text(text=text)
66
-
67
- store_name, _ = os.path.splitext(os.path.basename(file_path))
68
-
69
- if os.path.exists(f"{store_name}.pkl"):
70
- with open(f"{store_name}.pkl", "rb") as f:
71
- VectorStore = pickle.load(f)
72
- else:
73
  embeddings = OpenAIEmbeddings()
74
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
75
  with open(f"{store_name}.pkl", "wb") as f:
76
  pickle.dump(VectorStore, f)
 
 
 
77
 
78
  return VectorStore
79
 
80
-
 
 
 
 
 
 
81
 
82
  def load_chatbot():
83
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
84
 
85
  def main():
86
-
87
- hide_streamlit_style = """
88
- <style>
89
- #MainMenu {visibility: hidden;}
90
- footer {visibility: hidden;}
91
- </style>
92
- """
93
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
94
-
95
-
96
- # Main content
97
- st.title("Welcome to BinDocs ChatBot! 🤖")
98
 
99
- # Directly specifying the path to the PDF file
100
- pdf_path = pdf_file_path
101
- if not os.path.exists(pdf_path):
102
- st.error("File not found. Please check the file path.")
103
- return
104
-
105
- if "chat_history" not in st.session_state:
106
- st.session_state['chat_history'] = []
107
-
108
- display_chat_history(st.session_state['chat_history'])
109
-
110
- st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
111
- st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
112
- st.write("<!-- End Spacer -->", unsafe_allow_html=True)
113
-
114
- new_messages_placeholder = st.empty()
115
-
116
- if pdf_path is not None:
117
- query = st.text_input("Ask questions about your PDF file (in any preferred language):")
118
-
119
- if st.button("Was genau ist ein Belegarzt?"):
120
- query = "Was genau ist ein Belegarzt?"
121
- if st.button("Wofür wird die Alpha-ID verwendet?"):
122
- query = "Wofür wird die Alpha-ID verwendet?"
123
- if st.button("Was sind die Vorteile des ambulanten operierens?"):
124
- query = "Was sind die Vorteile des ambulanten operierens?"
125
- if st.button("Was kann ich mit dem Prognose-Analyse Toll machen?"):
126
- query = "Was kann ich mit dem Prognose-Analyse Toll machen?"
127
- if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
128
- query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
129
- if st.button("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?"):
130
- query = ("Ich habe mein Meta Password vergessen, wie kann ich es zurücksetzen?")
131
-
132
 
133
- if st.button("Ask") or (not st.session_state['chat_history'] and query) or (st.session_state['chat_history'] and query != st.session_state['chat_history'][-1][1]):
134
- st.session_state['chat_history'].append(("User", query, "new"))
135
-
136
- loading_message = st.empty()
137
- loading_message.text('Bot is thinking...')
138
-
139
- # Start timing
140
- start_time = time.time()
141
-
142
- VectorStore = load_pdf(pdf_path)
143
- chain = load_chatbot()
144
- docs = VectorStore.similarity_search(query=query, k=3)
145
- with get_openai_callback() as cb:
146
- response = chain.run(input_documents=docs, question=query)
147
-
148
- # Stop timing
149
- end_time = time.time()
150
-
151
- # Calculate duration
152
- duration = end_time - start_time
153
-
154
- # You can use Streamlit's text function to display the timing
155
- st.text(f"Response time: {duration:.2f} seconds")
156
-
157
-
158
- st.session_state['chat_history'].append(("Bot", response, "new"))
159
-
160
- # Display new messages at the bottom
161
- new_messages = st.session_state['chat_history'][-2:]
162
- for chat in new_messages:
163
- background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
164
- new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
165
-
166
- # Scroll to the latest response using JavaScript
167
- st.write("<script>document.getElementById('response').scrollIntoView();</script>", unsafe_allow_html=True)
168
-
169
- loading_message.empty()
170
-
171
- # Clear the input field by setting the query variable to an empty string
172
- query = ""
173
-
174
- # Mark all messages as old after displaying
175
- st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
 
 
 
 
 
 
 
 
176
 
 
 
 
177
 
178
 
179
  def display_chat_history(chat_history):
180
  for chat in chat_history:
181
- background_color = "#FFA07A" if chat[2] == "new" else "#acf" if chat[0] == "User" else "#caf"
182
  st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
183
 
 
184
  if __name__ == "__main__":
185
  main()
 
1
  import streamlit as st
2
  import time
3
+ import streamlit_analytics
4
  from dotenv import load_dotenv
5
  import pickle
6
  from huggingface_hub import Repository
 
18
  repo = Repository(
19
  local_dir="Private_Book", # Local directory to clone the repository
20
  repo_type="dataset", # Specify that this is a dataset repository
 
21
  clone_from="Anne31415/Private_Book", # Replace with your repository URL
 
22
  token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
23
  )
24
  repo.git_pull() # Pull the latest changes (if any)
25
 
26
  # Step 2: Load the PDF File
27
+ pdf_path = "Private_Book/KOMBI_all2.pdf" # Replace with your PDF file path
28
 
29
  with st.sidebar:
30
  st.title('BinDoc GmbH')
 
49
  api_key = os.getenv("OPENAI_API_KEY")
50
  # Retrieve the API key from st.secrets
51
 
52
+ # Updated caching mechanism using st.cache_data
53
+ @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
54
+
55
+
56
+ def load_vector_store(file_path, store_name, force_reload=False):
57
+ # Check if we need to force reload the vector store (e.g., when the PDF changes)
58
+ if force_reload or not os.path.exists(f"{store_name}.pkl"):
59
+ text_splitter = RecursiveCharacterTextSplitter(
60
+ chunk_size=1000,
61
+ chunk_overlap=200,
62
+ length_function=len
63
+ )
64
+
65
+ text = load_pdf_text(file_path)
66
+ chunks = text_splitter.split_text(text=text)
67
+
 
 
 
 
68
  embeddings = OpenAIEmbeddings()
69
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
70
  with open(f"{store_name}.pkl", "wb") as f:
71
  pickle.dump(VectorStore, f)
72
+ else:
73
+ with open(f"{store_name}.pkl", "rb") as f:
74
+ VectorStore = pickle.load(f)
75
 
76
  return VectorStore
77
 
78
+ # Utility function to load text from a PDF
79
+ def load_pdf_text(file_path):
80
+ pdf_reader = PdfReader(file_path)
81
+ text = ""
82
+ for page in pdf_reader.pages:
83
+ text += page.extract_text() or "" # Add fallback for pages where text extraction fails
84
+ return text
85
 
86
  def load_chatbot():
87
  return load_qa_chain(llm=OpenAI(), chain_type="stuff")
88
 
89
  def main():
90
+ try:
91
+ hide_streamlit_style = """
92
+ <style>
93
+ #MainMenu {visibility: hidden;}
94
+ footer {visibility: hidden;}
95
+ </style>
96
+ """
97
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 
 
 
 
98
 
99
+ # Main content
100
+ st.title("Welcome to BinDocs ChatBot! 🤖")
101
+
102
+ # Start tracking user interactions
103
+ with streamlit_analytics.track():
104
+ if not os.path.exists(pdf_path):
105
+ st.error("File not found. Please check the file path.")
106
+ return
107
+
108
+ VectorStore = load_vector_store(pdf_path, "my_vector_store", force_reload=False)
109
+
110
+
111
+ if "chat_history" not in st.session_state:
112
+ st.session_state['chat_history'] = []
113
+
114
+ display_chat_history(st.session_state['chat_history'])
115
+
116
+ st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
117
+ st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
118
+ st.write("<!-- End Spacer -->", unsafe_allow_html=True)
119
+
120
+ new_messages_placeholder = st.empty()
121
+
122
+ query = st.text_input("Ask questions about your PDF file (in any preferred language):")
 
 
 
 
 
 
 
 
 
123
 
124
+ if st.button("Was genau ist ein Belegarzt?"):
125
+ query = "Was genau ist ein Belegarzt?"
126
+ if st.button("Wofür wird die Alpha-ID verwendet?"):
127
+ query = "Wofür wird die Alpha-ID verwendet?"
128
+ if st.button("Was sind die Vorteile des ambulanten Operierens?"):
129
+ query = "Was sind die Vorteile des ambulanten Operierens?"
130
+ if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
131
+ query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
132
+ if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
133
+ query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
134
+ if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
135
+ query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
136
+
137
+ if query:
138
+ st.session_state['chat_history'].append(("User", query, "new"))
139
+
140
+ # Start timing
141
+ start_time = time.time()
142
+
143
+ with st.spinner('Bot is thinking...'):
144
+ # Use the VectorStore loaded at the start from the session state
145
+ chain = load_chatbot()
146
+ docs = VectorStore.similarity_search(query=query, k=3)
147
+ with get_openai_callback() as cb:
148
+ response = chain.run(input_documents=docs, question=query)
149
+
150
+
151
+ # Stop timing
152
+ end_time = time.time()
153
+
154
+ # Calculate duration
155
+ duration = end_time - start_time
156
+
157
+ # You can use Streamlit's text function to display the timing
158
+ st.text(f"Response time: {duration:.2f} seconds")
159
+
160
+ st.session_state['chat_history'].append(("Bot", response, "new"))
161
+
162
+
163
+ # Display new messages at the bottom
164
+ new_messages = st.session_state['chat_history'][-2:]
165
+ for chat in new_messages:
166
+ background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
167
+ new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
168
+
169
+
170
+ # Clear the input field after the query is made
171
+ query = ""
172
+
173
+ # Mark all messages as old after displaying
174
+ st.session_state['chat_history'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history']]
175
 
176
+ except Exception as e:
177
+ st.error(f"Upsi, an unexpected error occurred: {e}")
178
+ # Optionally log the exception details to a file or error tracking service
179
 
180
 
181
  def display_chat_history(chat_history):
182
  for chat in chat_history:
183
+ background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
184
  st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
185
 
186
+
187
  if __name__ == "__main__":
188
  main()