mikepastor11 commited on
Commit
1b5f1f5
1 Parent(s): b758977

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -75
app.py CHANGED
@@ -9,11 +9,8 @@
9
 
10
  import streamlit as st
11
  from streamlit.components.v1 import html
12
-
13
  # from dotenv import load_dotenv
14
-
15
  from PyPDF2 import PdfReader
16
-
17
  from PIL import Image
18
 
19
  # Local file
@@ -37,6 +34,7 @@ DISPLAY_DIALOG_LINES = 6
37
 
38
  SESSION_STARTED = False
39
 
 
40
 
41
  ##################################################################################
42
  def extract_pdf_text(pdf_docs):
@@ -109,7 +107,7 @@ def prepare_conversation(vectorstore):
109
 
110
  ##################################################################################
111
  def process_user_question(user_question):
112
- print('process_user_question called: \n')
113
 
114
  # if not SESSION_STARTED:
115
  # print('No Session')
@@ -150,47 +148,73 @@ def process_user_question(user_question):
150
  # st.error("Please upload and analyze your PDF files first!")
151
  # return
152
 
153
- if st.session_state.conversation == None:
154
- st.error("Please upload and analyze your PDF files first!")
155
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- response = st.session_state.conversation({'question': user_question})
158
- st.session_state.chat_history = response['chat_history']
159
- results_size = len(response['chat_history'])
160
 
161
- results_string = ""
 
162
 
163
- print('results_size is: ', results_size)
 
164
 
165
- for i, message in enumerate(st.session_state.chat_history):
 
 
166
 
167
- # Scrolling does not display the last printed line,
168
- # so only print the last 6 lines
169
- #
170
- print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
171
- if results_size > DISPLAY_DIALOG_LINES:
172
- if i < (results_size - DISPLAY_DIALOG_LINES):
173
- continue
174
 
175
- if i % 2 == 0:
176
- # st.write(user_template.replace(
177
- # "{{MSG}}", message.content), unsafe_allow_html=True)
178
 
179
- results_string += ("<p>" + message.content + "</p>")
 
 
180
 
181
- else:
182
- # st.write(bot_template.replace(
183
- # "{{MSG}}", message.content), unsafe_allow_html=True)
184
 
185
- results_string += ("<p>" + "-- " + message.content + "</p>")
186
 
187
- html(results_string, height=300, scrolling=True)
188
 
189
 
190
  ###################################################################################
191
  def main():
192
-
193
-
194
  print('Pennwick Starting up...\n')
195
  # Load the environment variables - if any
196
  # load_dotenv()
@@ -214,7 +238,7 @@ def main():
214
  # # Set page config with base64 string
215
  # st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
216
 
217
- st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./robot_icon.ico")
218
 
219
  print('prepared page...\n')
220
 
@@ -231,11 +255,11 @@ def main():
231
  # st.header("Pennwick File Analyzer 2")
232
 
233
  # st.image("robot_icon.png", width=96)
234
- st.image("HoneybeeLogo.png", width=96)
235
  st.header(f"Pennwick Honeybee Robot")
236
 
237
  user_question = None
238
- user_question = st.text_input("Ask the Open Source - Flan-T5 Model any question about Honeybees...")
239
  if user_question != None:
240
  print('calling process question', user_question)
241
  process_user_question(user_question)
@@ -243,47 +267,47 @@ def main():
243
  # st.write( user_template, unsafe_allow_html=True)
244
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
245
  # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
246
-
247
- with st.sidebar:
248
-
249
- st.subheader("Which documents would you like to analyze?")
250
- st.subheader("(no data is saved beyond the session)")
251
-
252
- pdf_docs = st.file_uploader(
253
- "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
254
-
255
- # Upon button press
256
- if st.button("Analyze these files"):
257
- with st.spinner("Processing..."):
258
- #################################################################
259
- # Track the overall time for file processing into Vectors
260
- # #
261
- from datetime import datetime
262
- global_now = datetime.now()
263
- global_current_time = global_now.strftime("%H:%M:%S")
264
- st.write("Vectorizing Files - Current Time =", global_current_time)
265
-
266
- # get pdf text
267
- raw_text = extract_pdf_text(pdf_docs)
268
- # st.write(raw_text)
269
-
270
- # # get the text chunks
271
- text_chunks = extract_bitesize_pieces(raw_text)
272
- # st.write(text_chunks)
273
-
274
- # # create vector store
275
- vectorstore = prepare_embedding_vectors(text_chunks)
276
-
277
- # # create conversation chain
278
- st.session_state.conversation = prepare_conversation(vectorstore)
279
-
280
- SESSION_STARTED = True
281
-
282
- # Mission Complete!
283
- global_later = datetime.now()
284
- st.write("Files Vectorized - Total EXECUTION Time =",
285
- (global_later - global_now), global_later)
286
-
287
 
288
  if __name__ == '__main__':
289
  main()
 
9
 
10
  import streamlit as st
11
  from streamlit.components.v1 import html
 
12
  # from dotenv import load_dotenv
 
13
  from PyPDF2 import PdfReader
 
14
  from PIL import Image
15
 
16
  # Local file
 
34
 
35
  SESSION_STARTED = False
36
 
37
+ MODEL_NAME="deepset/roberta-base-squad2"
38
 
39
  ##################################################################################
40
  def extract_pdf_text(pdf_docs):
 
107
 
108
  ##################################################################################
109
  def process_user_question(user_question):
110
+ st.write( ('process_user_question called: '+user_question), unsafe_allow_html=True)
111
 
112
  # if not SESSION_STARTED:
113
  # print('No Session')
 
148
  # st.error("Please upload and analyze your PDF files first!")
149
  # return
150
 
151
+ # if st.session_state.conversation == None:
152
+ # st.error("Please upload and analyze your PDF files first!")
153
+ # return
154
+
155
+ #
156
+ # response = st.session_state.conversation({'question': user_question})
157
+ # st.session_state.chat_history = response['chat_history']
158
+ # results_size = len(response['chat_history'])
159
+ #
160
+ # results_string = ""
161
+ #
162
+ # print('results_size is: ', results_size)
163
+ #
164
+ # for i, message in enumerate(st.session_state.chat_history):
165
+ #
166
+ # # Scrolling does not display the last printed line,
167
+ # # so only print the last 6 lines
168
+ # #
169
+ # print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
170
+ # if results_size > DISPLAY_DIALOG_LINES:
171
+ # if i < (results_size - DISPLAY_DIALOG_LINES):
172
+ # continue
173
+ #
174
+ # if i % 2 == 0:
175
+ # # st.write(user_template.replace(
176
+ # # "{{MSG}}", message.content), unsafe_allow_html=True)
177
+ #
178
+ # results_string += ("<p>" + message.content + "</p>")
179
+ #
180
+ # else:
181
+ # # st.write(bot_template.replace(
182
+ # # "{{MSG}}", message.content), unsafe_allow_html=True)
183
+ #
184
+ # results_string += ("<p>" + "-- " + message.content + "</p>")
185
+
186
+ st.write('start pipelene', unsafe_allow_html=True)
187
 
188
+ from transformers import pipeline
 
 
189
 
190
+ # Choose a question answering pipeline (e.g., 'question-answering')
191
+ nlp = pipeline("question-answering")
192
 
193
+ # Specify the model name or identifier (e.g., 'deepset/roberta-base-squad2')
194
+ model_name = MODEL_NAME
195
 
196
+ # Prepare the question and context (optional)
197
+ # question = "What is the capital of France?"
198
+ # context = "France is a country located in Western Europe. It is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Belgium, Luxembourg, Germany, Switzerland, Italy, and Spain to the east and north."
199
 
200
+ context = "You are an expert Apiarist and answer all questions regarding Honeybees."
 
 
 
 
 
 
201
 
202
+ # Ask the question
203
+ answer = nlp(question=user_question, context=context, model=model_name)
 
204
 
205
+ # Print the answer
206
+ print(f"Answer: {answer['answer']}")
207
+ print(f"Score: {answer['score']}")
208
 
209
+ st.write( ('Answer= '+answer['answer']), unsafe_allow_html=True)
 
 
210
 
211
+ results_string = answer['answer'] + ' - Probability= ' + str( answer['score'] )
212
 
213
+ html(results_string, height=100, scrolling=True)
214
 
215
 
216
  ###################################################################################
217
  def main():
 
 
218
  print('Pennwick Starting up...\n')
219
  # Load the environment variables - if any
220
  # load_dotenv()
 
238
  # # Set page config with base64 string
239
  # st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
240
 
241
+ st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./HoneybeeLogo.ico")
242
 
243
  print('prepared page...\n')
244
 
 
255
  # st.header("Pennwick File Analyzer 2")
256
 
257
  # st.image("robot_icon.png", width=96)
258
+ st.image("./HoneybeeLogo.png", width=96)
259
  st.header(f"Pennwick Honeybee Robot")
260
 
261
  user_question = None
262
+ user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...")
263
  if user_question != None:
264
  print('calling process question', user_question)
265
  process_user_question(user_question)
 
267
  # st.write( user_template, unsafe_allow_html=True)
268
  # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
269
  # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
270
+ #
271
+ # with st.sidebar:
272
+ #
273
+ # st.subheader("Which documents would you like to analyze?")
274
+ # st.subheader("(no data is saved beyond the session)")
275
+ #
276
+ # pdf_docs = st.file_uploader(
277
+ # "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
278
+ #
279
+ # # Upon button press
280
+ # if st.button("Analyze these files"):
281
+ # with st.spinner("Processing..."):
282
+ # #################################################################
283
+ # # Track the overall time for file processing into Vectors
284
+ # # #
285
+ # from datetime import datetime
286
+ # global_now = datetime.now()
287
+ # global_current_time = global_now.strftime("%H:%M:%S")
288
+ # st.write("Vectorizing Files - Current Time =", global_current_time)
289
+ #
290
+ # # get pdf text
291
+ # raw_text = extract_pdf_text(pdf_docs)
292
+ # # st.write(raw_text)
293
+ #
294
+ # # # get the text chunks
295
+ # text_chunks = extract_bitesize_pieces(raw_text)
296
+ # # st.write(text_chunks)
297
+ #
298
+ # # # create vector store
299
+ # vectorstore = prepare_embedding_vectors(text_chunks)
300
+ #
301
+ # # # create conversation chain
302
+ # st.session_state.conversation = prepare_conversation(vectorstore)
303
+ #
304
+ # SESSION_STARTED = True
305
+ #
306
+ # # Mission Complete!
307
+ # global_later = datetime.now()
308
+ # st.write("Files Vectorized - Total EXECUTION Time =",
309
+ # (global_later - global_now), global_later)
310
+ #
311
 
312
  if __name__ == '__main__':
313
  main()