Spaces:

mikepastor11
/

PennwickHoneybeeRobot

Sleeping

App Files Files Community

mikepastor11 commited on Feb 22

Commit

1b5f1f5

•

1 Parent(s): b758977

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -75

app.py CHANGED Viewed

@@ -9,11 +9,8 @@
 import streamlit as st
 from streamlit.components.v1 import html
 # from dotenv import load_dotenv
 from PyPDF2 import PdfReader
 from PIL import Image
 # Local file
@@ -37,6 +34,7 @@ DISPLAY_DIALOG_LINES = 6
 SESSION_STARTED = False
 ##################################################################################
 def extract_pdf_text(pdf_docs):
@@ -109,7 +107,7 @@ def prepare_conversation(vectorstore):
 ##################################################################################
 def process_user_question(user_question):
-    print('process_user_question called: \n')
     # if not SESSION_STARTED:
     #     print('No Session')
@@ -150,47 +148,73 @@ def process_user_question(user_question):
     #     st.error("Please upload and analyze your PDF files first!")
     #     return
-    if st.session_state.conversation == None:
-        st.error("Please upload and analyze your PDF files first!")
-        return
-    response = st.session_state.conversation({'question': user_question})
-    st.session_state.chat_history = response['chat_history']
-    results_size = len(response['chat_history'])
-    results_string = ""
-    print('results_size is: ', results_size)
-    for i, message in enumerate(st.session_state.chat_history):
-        #  Scrolling does not display the last printed line,
-        #    so only print the last 6 lines
-        #
-        print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
-        if results_size > DISPLAY_DIALOG_LINES:
-            if i < (results_size - DISPLAY_DIALOG_LINES):
-                continue
-        if i % 2 == 0:
-            # st.write(user_template.replace(
-            #     "{{MSG}}", message.content), unsafe_allow_html=True)
-            results_string += ("<p>" + message.content + "</p>")
-        else:
-            # st.write(bot_template.replace(
-            #     "{{MSG}}", message.content), unsafe_allow_html=True)
-            results_string += ("<p>" + "-- " + message.content + "</p>")
-    html(results_string, height=300, scrolling=True)
 ###################################################################################
 def main():
     print('Pennwick Starting up...\n')
     # Load the environment variables - if any
     # load_dotenv()
@@ -214,7 +238,7 @@ def main():
     # # Set page config with base64 string
     # st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
-    st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./robot_icon.ico")
     print('prepared page...\n')
@@ -231,11 +255,11 @@ def main():
     # st.header("Pennwick File Analyzer 2")
     # st.image("robot_icon.png", width=96)
-    st.image("HoneybeeLogo.png", width=96)
     st.header(f"Pennwick Honeybee Robot")
     user_question = None
-    user_question = st.text_input("Ask the  Open Source - Flan-T5 Model  any question about Honeybees...")
     if user_question != None:
         print('calling process question', user_question)
         process_user_question(user_question)
@@ -243,47 +267,47 @@ def main():
     # st.write( user_template, unsafe_allow_html=True)
     # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
     # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
-    with st.sidebar:
-        st.subheader("Which documents would you like to analyze?")
-        st.subheader("(no data is saved beyond the session)")
-        pdf_docs = st.file_uploader(
-            "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
-        # Upon button press
-        if st.button("Analyze these files"):
-            with st.spinner("Processing..."):
-                #################################################################
-                #  Track the overall time for file processing into Vectors
-                # #
-                from datetime import datetime
-                global_now = datetime.now()
-                global_current_time = global_now.strftime("%H:%M:%S")
-                st.write("Vectorizing Files - Current Time =", global_current_time)
-                # get pdf text
-                raw_text = extract_pdf_text(pdf_docs)
-                #  st.write(raw_text)
-                # # get the text chunks
-                text_chunks = extract_bitesize_pieces(raw_text)
-                # st.write(text_chunks)
-                # # create vector store
-                vectorstore = prepare_embedding_vectors(text_chunks)
-                # # create conversation chain
-                st.session_state.conversation = prepare_conversation(vectorstore)
-                SESSION_STARTED = True
-                # Mission Complete!
-                global_later = datetime.now()
-                st.write("Files Vectorized - Total EXECUTION Time =",
-                         (global_later - global_now), global_later)
 if __name__ == '__main__':
     main()

 import streamlit as st
 from streamlit.components.v1 import html
 # from dotenv import load_dotenv
 from PyPDF2 import PdfReader
 from PIL import Image
 # Local file
 SESSION_STARTED = False
+MODEL_NAME="deepset/roberta-base-squad2"
 ##################################################################################
 def extract_pdf_text(pdf_docs):
 ##################################################################################
 def process_user_question(user_question):
+    st.write( ('process_user_question called: '+user_question), unsafe_allow_html=True)
     # if not SESSION_STARTED:
     #     print('No Session')
     #     st.error("Please upload and analyze your PDF files first!")
     #     return
+    # if st.session_state.conversation == None:
+    #     st.error("Please upload and analyze your PDF files first!")
+    #     return
+    #
+    # response = st.session_state.conversation({'question': user_question})
+    # st.session_state.chat_history = response['chat_history']
+    # results_size = len(response['chat_history'])
+    #
+    # results_string = ""
+    #
+    # print('results_size is: ', results_size)
+    #
+    # for i, message in enumerate(st.session_state.chat_history):
+    #
+    #     #  Scrolling does not display the last printed line,
+    #     #    so only print the last 6 lines
+    #     #
+    #     print('results_size on msg: ', results_size, i, (results_size - DISPLAY_DIALOG_LINES))
+    #     if results_size > DISPLAY_DIALOG_LINES:
+    #         if i < (results_size - DISPLAY_DIALOG_LINES):
+    #             continue
+    #
+    #     if i % 2 == 0:
+    #         # st.write(user_template.replace(
+    #         #     "{{MSG}}", message.content), unsafe_allow_html=True)
+    #
+    #         results_string += ("<p>" + message.content + "</p>")
+    #
+    #     else:
+    #         # st.write(bot_template.replace(
+    #         #     "{{MSG}}", message.content), unsafe_allow_html=True)
+    #
+    #         results_string += ("<p>" + "-- " + message.content + "</p>")
+    st.write('start pipelene', unsafe_allow_html=True)
+    from transformers import pipeline
+    # Choose a question answering pipeline (e.g., 'question-answering')
+    nlp = pipeline("question-answering")
+    # Specify the model name or identifier (e.g., 'deepset/roberta-base-squad2')
+    model_name = MODEL_NAME
+    # Prepare the question and context (optional)
+    # question = "What is the capital of France?"
+    # context = "France is a country located in Western Europe. It is bordered by the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Belgium, Luxembourg, Germany, Switzerland, Italy, and Spain to the east and north."
+    context = "You are an expert Apiarist and answer all questions regarding Honeybees."
+    # Ask the question
+    answer = nlp(question=user_question, context=context, model=model_name)
+    # Print the answer
+    print(f"Answer: {answer['answer']}")
+    print(f"Score: {answer['score']}")
+    st.write( ('Answer= '+answer['answer']), unsafe_allow_html=True)
+    results_string = answer['answer'] + '     - Probability= ' + str( answer['score'] )
+    html(results_string, height=100, scrolling=True)
 ###################################################################################
 def main():
     print('Pennwick Starting up...\n')
     # Load the environment variables - if any
     # load_dotenv()
     # # Set page config with base64 string
     # st.set_page_config(page_title="Pennwick File Analyzer 2", page_icon=f"data:image/ico;base64,{encoded_string}")
+    st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./HoneybeeLogo.ico")
     print('prepared page...\n')
     # st.header("Pennwick File Analyzer 2")
     # st.image("robot_icon.png", width=96)
+    st.image("./HoneybeeLogo.png", width=96)
     st.header(f"Pennwick Honeybee Robot")
     user_question = None
+    user_question = st.text_input("Ask the  Open Source - "+MODEL_NAME+" - Model  any question about Honeybees...")
     if user_question != None:
         print('calling process question', user_question)
         process_user_question(user_question)
     # st.write( user_template, unsafe_allow_html=True)
     # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
     # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
+    #
+    # with st.sidebar:
+    #
+    #     st.subheader("Which documents would you like to analyze?")
+    #     st.subheader("(no data is saved beyond the session)")
+    #
+    #     pdf_docs = st.file_uploader(
+    #         "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
+    #
+    #     # Upon button press
+    #     if st.button("Analyze these files"):
+    #         with st.spinner("Processing..."):
+    #             #################################################################
+    #             #  Track the overall time for file processing into Vectors
+    #             # #
+    #             from datetime import datetime
+    #             global_now = datetime.now()
+    #             global_current_time = global_now.strftime("%H:%M:%S")
+    #             st.write("Vectorizing Files - Current Time =", global_current_time)
+    #
+    #             # get pdf text
+    #             raw_text = extract_pdf_text(pdf_docs)
+    #             #  st.write(raw_text)
+    #
+    #             # # get the text chunks
+    #             text_chunks = extract_bitesize_pieces(raw_text)
+    #             # st.write(text_chunks)
+    #
+    #             # # create vector store
+    #             vectorstore = prepare_embedding_vectors(text_chunks)
+    #
+    #             # # create conversation chain
+    #             st.session_state.conversation = prepare_conversation(vectorstore)
+    #
+    #             SESSION_STARTED = True
+    #
+    #             # Mission Complete!
+    #             global_later = datetime.now()
+    #             st.write("Files Vectorized - Total EXECUTION Time =",
+    #                      (global_later - global_now), global_later)
+    #
 if __name__ == '__main__':
     main()