Anne31415 commited on
Commit
c368a2c
1 Parent(s): d709f6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -377
app.py CHANGED
@@ -1,383 +1,69 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import random
4
- import time
5
- import streamlit_analytics
6
  from dotenv import load_dotenv
7
- import pickle
8
- from huggingface_hub import Repository
9
- from PyPDF2 import PdfReader
10
- from streamlit_extras.add_vertical_space import add_vertical_space
11
- from langchain.text_splitter import RecursiveCharacterTextSplitter
12
- from langchain.embeddings.openai import OpenAIEmbeddings
13
- from langchain.vectorstores import FAISS
14
- from langchain.llms import OpenAI
15
- from langchain.chains.question_answering import load_qa_chain
16
- from langchain.callbacks import get_openai_callback
17
  import os
18
-
19
- import pandas as pd
20
- import pydeck as pdk
21
- from urllib.error import URLError
22
-
23
- # Initialize session state variables
24
- if 'chat_history_page1' not in st.session_state:
25
- st.session_state['chat_history_page1'] = []
26
-
27
- if 'chat_history_page2' not in st.session_state:
28
- st.session_state['chat_history_page2'] = []
29
-
30
-
31
-
32
- # Step 1: Clone the Dataset Repository
33
- repo = Repository(
34
- local_dir="Private_Book", # Local directory to clone the repository
35
- repo_type="dataset", # Specify that this is a dataset repository
36
- clone_from="Anne31415/Private_Book", # Replace with your repository URL
37
- token=os.environ["HUB_TOKEN"] # Use the secret token to authenticate
38
- )
39
- repo.git_pull() # Pull the latest changes (if any)
40
-
41
- # Step 2: Load the PDF File
42
- pdf_path = "Private_Book/18122023_KOMBI.pdf" # Replace with your PDF file path
43
-
44
- # Step 2: Load the PDF File
45
- pdf_path2 = "Private_Book/Deutsche_Kodierrichtlinien_23.pdf" # Replace with your PDF file path
46
-
47
-
48
- api_key = os.getenv("OPENAI_API_KEY")
49
- # Retrieve the API key from st.secrets
50
-
51
-
52
-
53
- # Updated caching mechanism using st.cache_data
54
- @st.cache_data(persist="disk") # Using persist="disk" to save cache across sessions
55
- def load_vector_store(file_path, store_name, force_reload=False):
56
-
57
- # Check if we need to force reload the vector store (e.g., when the PDF changes)
58
- if force_reload or not os.path.exists(f"{store_name}.pkl"):
59
- text_splitter = RecursiveCharacterTextSplitter(
60
- chunk_size=1000,
61
- chunk_overlap=200,
62
- length_function=len
63
- )
64
-
65
- text = load_pdf_text(file_path)
66
- chunks = text_splitter.split_text(text=text)
67
-
68
- embeddings = OpenAIEmbeddings()
69
- VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
70
- VectorStore.save_local("faiss_store")
71
- FAISS.load_local("faiss_store", OpenAIEmbeddings())
72
- with open(f"{store_name}.pkl", "wb") as f:
73
- pickle.dump(VectorStore, f)
74
- else:
75
- with open(f"{store_name}.pkl", "rb") as f:
76
- VectorStore = pickle.load(f)
77
-
78
- return VectorStore
79
-
80
- # Utility function to load text from a PDF
81
- def load_pdf_text(file_path):
82
- pdf_reader = PdfReader(file_path)
83
- text = ""
84
- for page in pdf_reader.pages:
85
- text += page.extract_text() or "" # Add fallback for pages where text extraction fails
86
- return text
87
-
88
- def load_chatbot():
89
- #return load_qa_chain(llm=OpenAI(), chain_type="stuff")
90
- return load_qa_chain(llm=OpenAI(model_name="gpt-3.5-turbo-instruct"), chain_type="stuff")
91
-
92
-
93
- def display_chat_history(chat_history):
94
- for chat in chat_history:
95
- background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
96
- st.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
97
-
98
-
99
- def handle_no_answer(response):
100
- no_answer_phrases = [
101
- "ich weiß es nicht",
102
- "ich weiß nicht",
103
- "ich bin mir nicht sicher",
104
- "es wird nicht erwähnt",
105
- "Leider kann ich diese Frage nicht beantworten",
106
- "kann ich diese Frage nicht beantworten",
107
- "ich kann diese Frage nicht beantworten",
108
- "ich kann diese Frage leider nicht beantworten",
109
- "keine information",
110
- "das ist unklar",
111
- "da habe ich keine antwort",
112
- "das kann ich nicht beantworten",
113
- "i don't know",
114
- "i am not sure",
115
- "it is not mentioned",
116
- "no information",
117
- "that is unclear",
118
- "i have no answer",
119
- "i cannot answer that",
120
- "unable to provide an answer",
121
- "not enough context",
122
- ]
123
-
124
- alternative_responses = [
125
- "Hmm, das ist eine knifflige Frage. Lass uns das gemeinsam erkunden. Kannst du mehr Details geben?",
126
- "Interessante Frage! Ich bin mir nicht sicher, aber wir können es herausfinden. Hast du weitere Informationen?",
127
- "Das ist eine gute Frage. Ich habe momentan keine Antwort darauf, aber vielleicht kannst du sie anders formulieren?",
128
- "Da bin ich überfragt. Kannst du die Frage anders stellen oder mir mehr Kontext geben?",
129
- "Ich stehe hier etwas auf dem Schlauch. Gibt es noch andere Aspekte der Frage, die wir betrachten könnten?",
130
- # Add more alternative responses as needed
131
- ]
132
-
133
- # Check if response matches any phrase in no_answer_phrases
134
- if any(phrase in response.lower() for phrase in no_answer_phrases):
135
- return random.choice(alternative_responses) # Randomly select a response
136
- return response
137
-
138
-
139
-
140
-
141
- def page1():
142
- try:
143
- hide_streamlit_style = """
144
- <style>
145
- #MainMenu {visibility: hidden;}
146
- footer {visibility: hidden;}
147
- </style>
148
- """
149
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
150
-
151
- # Create columns for layout
152
- col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
153
-
154
- with col1:
155
- st.title("Welcome to BinDocs AI!")
156
-
157
- with col2:
158
- # Load and display the image in the right column, which will be the top-right corner of the page
159
- image = Image.open('BinDoc Logo (Quadratisch).png')
160
- st.image(image, use_column_width='always')
161
-
162
-
163
- # Start tracking user interactions
164
- with streamlit_analytics.track():
165
- if not os.path.exists(pdf_path):
166
- st.error("File not found. Please check the file path.")
167
- return
168
-
169
- VectorStore = load_vector_store(pdf_path, "vector_store_page1", force_reload=False)
170
-
171
- display_chat_history(st.session_state['chat_history_page1'])
172
-
173
- st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
174
- st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
175
- st.write("<!-- End Spacer -->", unsafe_allow_html=True)
176
-
177
- new_messages_placeholder = st.empty()
178
-
179
- query = st.text_input("Geben Sie hier Ihre Frage ein / Enter your question here:")
180
-
181
- add_vertical_space(2) # Adjust as per the desired spacing
182
-
183
- # Create two columns for the buttons
184
- col1, col2 = st.columns(2)
185
-
186
- with col1:
187
- if st.button("Was kann ich mit dem Prognose-Analyse-Tool machen?"):
188
- query = "Was kann ich mit dem Prognose-Analyse-Tool machen?"
189
- if st.button("Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"):
190
- query = "Was sagt mir die Farbe der Balken der Bevölkerungsentwicklung?"
191
- if st.button("Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"):
192
- query = "Ich habe mein Meta-Password vergessen, wie kann ich es zurücksetzen?"
193
-
194
-
195
- with col2:
196
- if st.button("Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."):
197
- query = "Dies ist eine reine Test Frage, welche aber eine ausreichende Länge hat."
198
- if st.button("Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"):
199
- query = "Was sagt mir denn generell die wundervolle Bevölkerungsentwicklung?"
200
- if st.button("Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"):
201
- query = "Ob ich hier wohl viel schreibe, dass die Fragen vom Layout her passen?"
202
-
203
 
204
- if query:
205
- st.session_state['chat_history_page1'].append(("User", query, "new"))
206
-
207
- # Start timing
208
- start_time = time.time()
209
-
210
- with st.spinner('Bot is thinking...'):
211
- chain = load_chatbot()
212
- docs = VectorStore.similarity_search(query=query, k=3)
213
- with get_openai_callback() as cb:
214
- response = chain.run(input_documents=docs, question=query)
215
- response = handle_no_answer(response) # Process the response through the new function
216
 
217
 
218
-
219
- # Stop timing
220
- end_time = time.time()
221
-
222
- # Calculate duration
223
- duration = end_time - start_time
224
-
225
- # You can use Streamlit's text function to display the timing
226
- st.text(f"Response time: {duration:.2f} seconds")
227
-
228
- st.session_state['chat_history_page1'].append(("Bot", response, "new"))
229
-
230
-
231
- # Display new messages at the bottom
232
- new_messages = st.session_state['chat_history_page1'][-2:]
233
- for chat in new_messages:
234
- background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
235
- new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
236
-
237
-
238
- # Clear the input field after the query is made
239
- query = ""
240
-
241
- # Mark all messages as old after displaying
242
- st.session_state['chat_history_page1'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page1']]
243
-
244
- except Exception as e:
245
- st.error(f"Upsi, an unexpected error occurred: {e}")
246
- # Optionally log the exception details to a file or error tracking service
247
-
248
-
249
-
250
-
251
- def page2():
252
- try:
253
- hide_streamlit_style = """
254
- <style>
255
- #MainMenu {visibility: hidden;}
256
- footer {visibility: hidden;}
257
- </style>
258
- """
259
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
260
-
261
- # Create columns for layout
262
- col1, col2 = st.columns([3, 1]) # Adjust the ratio to your liking
263
-
264
- with col1:
265
- st.title("Kodieren statt Frustrieren!")
266
-
267
- with col2:
268
- # Load and display the image in the right column, which will be the top-right corner of the page
269
- image = Image.open('BinDoc Logo (Quadratisch).png')
270
- st.image(image, use_column_width='always')
271
-
272
-
273
- # Start tracking user interactions
274
- with streamlit_analytics.track():
275
-
276
- if not os.path.exists(pdf_path2):
277
- st.error("File not found. Please check the file path.")
278
- return
279
-
280
- VectorStore = load_vector_store(pdf_path2, "vector_store_page2", force_reload=False)
281
-
282
-
283
-
284
- display_chat_history(st.session_state['chat_history_page2'])
285
-
286
- st.write("<!-- Start Spacer -->", unsafe_allow_html=True)
287
- st.write("<div style='flex: 1;'></div>", unsafe_allow_html=True)
288
- st.write("<!-- End Spacer -->", unsafe_allow_html=True)
289
-
290
- new_messages_placeholder = st.empty()
291
-
292
- query = st.text_input("Ask questions about your PDF file (in any preferred language):")
293
-
294
- add_vertical_space(2) # Adjust as per the desired spacing
295
-
296
- # Create two columns for the buttons
297
- col1, col2 = st.columns(2)
298
-
299
- with col1:
300
- if st.button("Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"):
301
- query = "Wann kodiere ich etwas als Hauptdiagnose und wann als Nebendiagnose?"
302
- if st.button("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?"):
303
- query = ("Ein Patient wird mit Aszites bei bekannter Leberzirrhose stationär aufgenommen. Es wird nur der Aszites durch eine Punktion behandelt.Wie kodiere ich das?")
304
- if st.button("Hauptdiagnose: Hirntumor wie kodiere ich das?"):
305
- query = "Hauptdiagnose: Hirntumor wie kodiere ich das?"
306
-
307
-
308
- with col2:
309
- if st.button("Welche Prozeduren werden normalerweise nicht verschlüsselt?"):
310
- query = "Welche Prozeduren werden normalerweise nicht verschlüsselt?"
311
- if st.button("Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"):
312
- query = "Was muss ich bei der Kodierung der Folgezusänden von Krankheiten beachten?"
313
- if st.button("Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"):
314
- query = "Was mache ich bei einer Verdachtsdiagnose, wenn mein Patien nach Hause entlassen wird?"
315
-
316
 
317
- if query:
318
- st.session_state['chat_history_page2'].append(("User", query, "new"))
319
-
320
- # Start timing
321
- start_time = time.time()
322
-
323
- with st.spinner('Bot is thinking...'):
324
- chain = load_chatbot()
325
- docs = VectorStore.similarity_search(query=query, k=3)
326
- with get_openai_callback() as cb:
327
- response = chain.run(input_documents=docs, question=query)
328
- response = handle_no_answer(response) # Process the response through the new function
329
-
330
-
331
-
332
- # Stop timing
333
- end_time = time.time()
334
-
335
- # Calculate duration
336
- duration = end_time - start_time
337
-
338
- # You can use Streamlit's text function to display the timing
339
- st.text(f"Response time: {duration:.2f} seconds")
340
-
341
- st.session_state['chat_history_page2'].append(("Bot", response, "new"))
342
-
343
-
344
- # Display new messages at the bottom
345
- new_messages = st.session_state['chat_history_page2'][-2:]
346
- for chat in new_messages:
347
- background_color = "#ffeecf" if chat[2] == "new" else "#ffeecf" if chat[0] == "User" else "#ffeecf"
348
- new_messages_placeholder.markdown(f"<div style='background-color: {background_color}; padding: 10px; border-radius: 10px; margin: 10px;'>{chat[0]}: {chat[1]}</div>", unsafe_allow_html=True)
349
-
350
-
351
- # Clear the input field after the query is made
352
- query = ""
353
-
354
- # Mark all messages as old after displaying
355
- st.session_state['chat_history_page2'] = [(sender, msg, "old") for sender, msg, _ in st.session_state['chat_history_page2']]
356
-
357
- except Exception as e:
358
- st.error(f"Upsi, an unexpected error occurred: {e}")
359
- # Optionally log the exception details to a file or error tracking service
360
-
361
-
362
-
363
-
364
-
365
- def main():
366
- # Sidebar content
367
- with st.sidebar:
368
- st.title('BinDoc GmbH')
369
- st.markdown("Experience revolutionary interaction with BinDocs Chat App, leveraging state-of-the-art AI technology.")
370
- add_vertical_space(1)
371
- page = st.sidebar.selectbox("Choose a page", ["Document Analysis Bot", "Coding Assistance Bot"])
372
- add_vertical_space(1)
373
- st.write('Made with ❤️ by BinDoc GmbH')
374
-
375
- # Main area content based on page selection
376
- if page == "Document Analysis Bot":
377
- page1()
378
- elif page == "Coding Assistance Bot":
379
- page2()
380
-
381
-
382
- if __name__ == "__main__":
383
- main()
 
1
+ import streamlit as st
2
+ from lida import Manager, TextGenerationConfig , llm
 
 
 
3
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
4
  import os
5
+ import openai
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ import base64
9
+
10
+ load_dotenv()
11
+ openai.api_key = os.getenv('OPENAI_API_KEY')
12
+
13
+ def base64_to_image(base64_string):
14
+ # Decode the base64 string
15
+ byte_data = base64.b64decode(base64_string)
16
+
17
+ # Use BytesIO to convert the byte data to image
18
+ return Image.open(BytesIO(byte_data))
19
+
20
+
21
+ lida = Manager(text_gen = llm("openai"))
22
+ textgen_config = TextGenerationConfig(n=1, temperature=0.5, model="gpt-3.5-turbo-0301", use_cache=True)
23
+
24
+ menu = st.sidebar.selectbox("Choose an Option", ["Summarize", "Question based Graph"])
25
+
26
+ if menu == "Summarize":
27
+ st.subheader("Summarization of your Data")
28
+ file_uploader = st.file_uploader("Upload your CSV", type="csv")
29
+ if file_uploader is not None:
30
+ path_to_save = "filename.csv"
31
+ with open(path_to_save, "wb") as f:
32
+ f.write(file_uploader.getvalue())
33
+ summary = lida.summarize("filename.csv", summary_method="default", textgen_config=textgen_config)
34
+ st.write(summary)
35
+ goals = lida.goals(summary, n=2, textgen_config=textgen_config)
36
+ for goal in goals:
37
+ st.write(goal)
38
+ i = 0
39
+ library = "seaborn"
40
+ textgen_config = TextGenerationConfig(n=1, temperature=0.2, use_cache=True)
41
+ charts = lida.visualize(summary=summary, goal=goals[i], textgen_config=textgen_config, library=library)
42
+ img_base64_string = charts[0].raster
43
+ img = base64_to_image(img_base64_string)
44
+ st.image(img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ elif menu == "Question based Graph":
50
+ st.subheader("Query your Data to Generate Graph")
51
+ file_uploader = st.file_uploader("Upload your CSV", type="csv")
52
+ if file_uploader is not None:
53
+ path_to_save = "filename1.csv"
54
+ with open(path_to_save, "wb") as f:
55
+ f.write(file_uploader.getvalue())
56
+ text_area = st.text_area("Query your Data to Generate Graph", height=200)
57
+ if st.button("Generate Graph"):
58
+ if len(text_area) > 0:
59
+ st.info("Your Query: " + text_area)
60
+ lida = Manager(text_gen = llm("openai"))
61
+ textgen_config = TextGenerationConfig(n=1, temperature=0.2, use_cache=True)
62
+ summary = lida.summarize("filename1.csv", summary_method="default", textgen_config=textgen_config)
63
+ user_query = text_area
64
+ charts = lida.visualize(summary=summary, goal=user_query, textgen_config=textgen_config)
65
+ charts[0]
66
+ image_base64 = charts[0].raster
67
+ img = base64_to_image(image_base64)
68
+ st.image(img)
69
+