Spaces:
Sleeping
Sleeping
mikepastor11
commited on
Commit
•
1b5f1f5
1
Parent(s):
b758977
Update app.py
Browse files
app.py
CHANGED
@@ -9,11 +9,8 @@
|
|
9 |
|
10 |
import streamlit as st
|
11 |
from streamlit.components.v1 import html
|
12 |
-
|
13 |
# from dotenv import load_dotenv
|
14 |
-
|
15 |
from PyPDF2 import PdfReader
|
16 |
-
|
17 |
from PIL import Image
|
18 |
|
19 |
# Local file
|
@@ -37,6 +34,7 @@ DISPLAY_DIALOG_LINES = 6
|
|
37 |
|
38 |
SESSION_STARTED = False
|
39 |
|
|
|
40 |
|
41 |
##################################################################################
|
42 |
def extract_pdf_text(pdf_docs):
|
@@ -109,7 +107,7 @@ def prepare_conversation(vectorstore):
|
|
109 |
|
110 |
##################################################################################
|
111 |
def process_user_question(user_question):
|
112 |
-
|
113 |
|
114 |
# if not SESSION_STARTED:
|
115 |
# print('No Session')
|
@@ -150,47 +148,73 @@ def process_user_question(user_question):
|
|
150 |
# st.error("Please upload and analyze your PDF files first!")
|
151 |
# return
|
152 |
|
153 |
-
if st.session_state.conversation == None:
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
-
|
158 |
-
st.session_state.chat_history = response['chat_history']
|
159 |
-
results_size = len(response['chat_history'])
|
160 |
|
161 |
-
|
|
|
162 |
|
163 |
-
|
|
|
164 |
|
165 |
-
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
# so only print the last 6 lines
|
169 |
-
#
|
170 |
-
print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
|
171 |
-
if results_size > DISPLAY_DIALOG_LINES:
|
172 |
-
if i < (results_size - DISPLAY_DIALOG_LINES):
|
173 |
-
continue
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
# "{{MSG}}", message.content), unsafe_allow_html=True)
|
178 |
|
179 |
-
|
|
|
|
|
180 |
|
181 |
-
|
182 |
-
# st.write(bot_template.replace(
|
183 |
-
# "{{MSG}}", message.content), unsafe_allow_html=True)
|
184 |
|
185 |
-
|
186 |
|
187 |
-
html(results_string, height=
|
188 |
|
189 |
|
190 |
###################################################################################
|
191 |
def main():
|
192 |
-
|
193 |
-
|
194 |
print('Pennwick Starting up...\n')
|
195 |
# Load the environment variables - if any
|
196 |
# load_dotenv()
|
@@ -214,7 +238,7 @@ def main():
|
|
214 |
# # Set page config with base64 string
|
215 |
# st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
|
216 |
|
217 |
-
st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./
|
218 |
|
219 |
print('prepared page...\n')
|
220 |
|
@@ -231,11 +255,11 @@ def main():
|
|
231 |
# st.header("Pennwick File Analyzer 2")
|
232 |
|
233 |
# st.image("robot_icon.png", width=96)
|
234 |
-
st.image("HoneybeeLogo.png", width=96)
|
235 |
st.header(f"Pennwick Honeybee Robot")
|
236 |
|
237 |
user_question = None
|
238 |
-
user_question = st.text_input("Ask the Open Source -
|
239 |
if user_question != None:
|
240 |
print('calling process question', user_question)
|
241 |
process_user_question(user_question)
|
@@ -243,47 +267,47 @@ def main():
|
|
243 |
# st.write( user_template, unsafe_allow_html=True)
|
244 |
# st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
|
245 |
# st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
|
246 |
-
|
247 |
-
with st.sidebar:
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
|
288 |
if __name__ == '__main__':
|
289 |
main()
|
|
|
9 |
|
10 |
import streamlit as st
|
11 |
from streamlit.components.v1 import html
|
|
|
12 |
# from dotenv import load_dotenv
|
|
|
13 |
from PyPDF2 import PdfReader
|
|
|
14 |
from PIL import Image
|
15 |
|
16 |
# Local file
|
|
|
34 |
|
35 |
SESSION_STARTED = False
|
36 |
|
37 |
+
MODEL_NAME="deepset/roberta-base-squad2"
|
38 |
|
39 |
##################################################################################
|
40 |
def extract_pdf_text(pdf_docs):
|
|
|
107 |
|
108 |
##################################################################################
|
109 |
def process_user_question(user_question):
|
110 |
+
st.write( ('process_user_question called: '+user_question), unsafe_allow_html=True)
|
111 |
|
112 |
# if not SESSION_STARTED:
|
113 |
# print('No Session')
|
|
|
148 |
# st.error("Please upload and analyze your PDF files first!")
|
149 |
# return
|
150 |
|
151 |
+
# if st.session_state.conversation == None:
|
152 |
+
# st.error("Please upload and analyze your PDF files first!")
|
153 |
+
# return
|
154 |
+
|
155 |
+
#
|
156 |
+
# response = st.session_state.conversation({'question': user_question})
|
157 |
+
# st.session_state.chat_history = response['chat_history']
|
158 |
+
# results_size = len(response['chat_history'])
|
159 |
+
#
|
160 |
+
# results_string = ""
|
161 |
+
#
|
162 |
+
# print('results_size is: ', results_size)
|
163 |
+
#
|
164 |
+
# for i, message in enumerate(st.session_state.chat_history):
|
165 |
+
#
|
166 |
+
# # Scrolling does not display the last printed line,
|
167 |
+
# # so only print the last 6 lines
|
168 |
+
# #
|
169 |
+
# print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
|
170 |
+
# if results_size > DISPLAY_DIALOG_LINES:
|
171 |
+
# if i < (results_size - DISPLAY_DIALOG_LINES):
|
172 |
+
# continue
|
173 |
+
#
|
174 |
+
# if i % 2 == 0:
|
175 |
+
# # st.write(user_template.replace(
|
176 |
+
# # "{{MSG}}", message.content), unsafe_allow_html=True)
|
177 |
+
#
|
178 |
+
# results_string += ("<p>" + message.content + "</p>")
|
179 |
+
#
|
180 |
+
# else:
|
181 |
+
# # st.write(bot_template.replace(
|
182 |
+
# # "{{MSG}}", message.content), unsafe_allow_html=True)
|
183 |
+
#
|
184 |
+
# results_string += ("<p>" + "-- " + message.content + "</p>")
|
185 |
+
|
186 |
+
st.write('start pipelene', unsafe_allow_html=True)
|
187 |
|
188 |
+
from transformers import pipeline
|
|
|
|
|
189 |
|
190 |
+
# Choose a question answering pipeline (e.g., 'question-answering')
|
191 |
+
nlp = pipeline("question-answering")
|
192 |
|
193 |
+
# Specify the model name or identifier (e.g., 'deepset/roberta-base-squad2')
|
194 |
+
model_name = MODEL_NAME
|
195 |
|
196 |
+
# Prepare the question and context (optional)
|
197 |
+
# question = "What is the capital of France?"
|
198 |
+
# context = "France is a country located in Western Europe. It is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Belgium, Luxembourg, Germany, Switzerland, Italy, and Spain to the east and north."
|
199 |
|
200 |
+
context = "You are an expert Apiarist and answer all questions regarding Honeybees."
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# Ask the question
|
203 |
+
answer = nlp(question=user_question, context=context, model=model_name)
|
|
|
204 |
|
205 |
+
# Print the answer
|
206 |
+
print(f"Answer: {answer['answer']}")
|
207 |
+
print(f"Score: {answer['score']}")
|
208 |
|
209 |
+
st.write( ('Answer= '+answer['answer']), unsafe_allow_html=True)
|
|
|
|
|
210 |
|
211 |
+
results_string = answer['answer'] + ' - Probability= ' + str( answer['score'] )
|
212 |
|
213 |
+
html(results_string, height=100, scrolling=True)
|
214 |
|
215 |
|
216 |
###################################################################################
|
217 |
def main():
|
|
|
|
|
218 |
print('Pennwick Starting up...\n')
|
219 |
# Load the environment variables - if any
|
220 |
# load_dotenv()
|
|
|
238 |
# # Set page config with base64 string
|
239 |
# st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
|
240 |
|
241 |
+
st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./HoneybeeLogo.ico")
|
242 |
|
243 |
print('prepared page...\n')
|
244 |
|
|
|
255 |
# st.header("Pennwick File Analyzer 2")
|
256 |
|
257 |
# st.image("robot_icon.png", width=96)
|
258 |
+
st.image("./HoneybeeLogo.png", width=96)
|
259 |
st.header(f"Pennwick Honeybee Robot")
|
260 |
|
261 |
user_question = None
|
262 |
+
user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...")
|
263 |
if user_question != None:
|
264 |
print('calling process question', user_question)
|
265 |
process_user_question(user_question)
|
|
|
267 |
# st.write( user_template, unsafe_allow_html=True)
|
268 |
# st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
|
269 |
# st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
|
270 |
+
#
|
271 |
+
# with st.sidebar:
|
272 |
+
#
|
273 |
+
# st.subheader("Which documents would you like to analyze?")
|
274 |
+
# st.subheader("(no data is saved beyond the session)")
|
275 |
+
#
|
276 |
+
# pdf_docs = st.file_uploader(
|
277 |
+
# "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
|
278 |
+
#
|
279 |
+
# # Upon button press
|
280 |
+
# if st.button("Analyze these files"):
|
281 |
+
# with st.spinner("Processing..."):
|
282 |
+
# #################################################################
|
283 |
+
# # Track the overall time for file processing into Vectors
|
284 |
+
# # #
|
285 |
+
# from datetime import datetime
|
286 |
+
# global_now = datetime.now()
|
287 |
+
# global_current_time = global_now.strftime("%H:%M:%S")
|
288 |
+
# st.write("Vectorizing Files - Current Time =", global_current_time)
|
289 |
+
#
|
290 |
+
# # get pdf text
|
291 |
+
# raw_text = extract_pdf_text(pdf_docs)
|
292 |
+
# # st.write(raw_text)
|
293 |
+
#
|
294 |
+
# # # get the text chunks
|
295 |
+
# text_chunks = extract_bitesize_pieces(raw_text)
|
296 |
+
# # st.write(text_chunks)
|
297 |
+
#
|
298 |
+
# # # create vector store
|
299 |
+
# vectorstore = prepare_embedding_vectors(text_chunks)
|
300 |
+
#
|
301 |
+
# # # create conversation chain
|
302 |
+
# st.session_state.conversation = prepare_conversation(vectorstore)
|
303 |
+
#
|
304 |
+
# SESSION_STARTED = True
|
305 |
+
#
|
306 |
+
# # Mission Complete!
|
307 |
+
# global_later = datetime.now()
|
308 |
+
# st.write("Files Vectorized - Total EXECUTION Time =",
|
309 |
+
# (global_later - global_now), global_later)
|
310 |
+
#
|
311 |
|
312 |
if __name__ == '__main__':
|
313 |
main()
|