########################################################################## # app.py - Pennwick Honeybee Robot # # HuggingFace Spaces application to provide honeybee expertise # with open-source models # # Mike Pastor February 23, 2024 import streamlit as st from streamlit.components.v1 import html # from dotenv import load_dotenv from PyPDF2 import PdfReader from PIL import Image from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # Local file from htmlTemplates import css, bot_template, user_template ################################################################################## # Admin flags DISPLAY_DIALOG_LINES = 6 SESSION_STARTED = False # MODEL_NAME="deepset/roberta-base-squad2" # MODEL_NAME="BEE-spoke-data/TinyLlama-3T-1.1bee" # MODEL_NAME='HuggingFaceH4/zephyr-7b-beta' ############################################################## # Our model and tokenizer # MODEL_NAME = "facebook/blenderbot-400M-distill" # MODEL_NAME = "facebook/blenderbot-3B" model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) ################################################################################## def process_user_question(user_question): # if not SESSION_STARTED: # print('No Session') # st.write( 'Please upload and analyze your PDF files first!') # return if user_question == None: print('question is null') return if user_question == '': print('question is blank') return if st == None: print('session is null') return if st.session_state == None: print('session STATE is null') return print('question is: ', user_question) print('\nsession is: ', st) ################################################################# # Track the overall time for training & submission preparation # # from datetime import datetime global_now = datetime.now() global_current_time = global_now.strftime("%H:%M:%S") print("# app.py Starting up... - Current Time =", global_current_time) st.write(('Question: ' + user_question ), unsafe_allow_html=True) # input_text = input('Say something--> ') print( 'history--> ', st.session_state.history_string) ################################################################ # Tokenize the user prompt and conversation history inputs = tokenizer.encode_plus( st.session_state.history_string, user_question, return_tensors="pt" ) # st.write('Len of inputs= ', len( inputs)) # Generate a response outputs = model.generate( **inputs ) # decode the response response = tokenizer.decode( outputs[0], skip_special_tokens=True).strip() # append history st.session_state.conversation_history.append(user_question) st.session_state.conversation_history.append(response) # st.session_state.history_string = "/n".join(st.session_state.conversation_history) st.session_state.history_string = "
".join( st.session_state.conversation_history ) st.write( 'Response: ', response) # Mission Complete! ################################################################################## global_later = datetime.now() st.write("Total query execute Time =", (global_later - global_now), global_later) ################################################################################# def main(): print('Pennwick Starting up...\n') ################################################################## # Initial conversation tracking if not hasattr(st.session_state, "conversation_history"): st.session_state.conversation_history = [] if not hasattr(st.session_state, "history_string"): st.session_state.history_string = "\n".join(st.session_state.conversation_history) # Load the environment variables - if any # load_dotenv() st.set_page_config(page_title="Pennwick Honeybee Robot", page_icon="./HoneybeeLogo.ico") st.write(css, unsafe_allow_html=True) st.image("./HoneybeeLogo.png", width=96) st.header(f"Pennwick Honeybee Robot") st.write( "BETA TEST VERSION only!", unsafe_allow_html=True) print('Prepared page...\n') user_question = None user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...") if user_question != None: print('calling process question', user_question) process_user_question(user_question) html_history_string = "" if len( st.session_state.history_string ) > 100: html_history_string = st.session_state.history_string[-100:] else: html_history_string = st.session_state.history_string html(html_history_string , height=150, scrolling=True) # st.write( user_template, unsafe_allow_html=True) # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True) # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True) # # with st.sidebar: # # st.subheader("Which documents would you like to analyze?") # st.subheader("(no data is saved beyond the session)") # # pdf_docs = st.file_uploader( # "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True) # # # Upon button press # if st.button("Analyze these files"): # with st.spinner("Processing..."): # ################################################################# # # Track the overall time for file processing into Vectors # # # # from datetime import datetime # global_now = datetime.now() # global_current_time = global_now.strftime("%H:%M:%S") # st.write("Vectorizing Files - Current Time =", global_current_time) # # # get pdf text # raw_text = extract_pdf_text(pdf_docs) # # st.write(raw_text) # # # # get the text chunks # text_chunks = extract_bitesize_pieces(raw_text) # # st.write(text_chunks) # # # # create vector store # vectorstore = prepare_embedding_vectors(text_chunks) # # # # create conversation chain # st.session_state.conversation = prepare_conversation(vectorstore) # # SESSION_STARTED = True # # # Mission Complete! # global_later = datetime.now() # st.write("Files Vectorized - Total EXECUTION Time =", # (global_later - global_now), global_later) # if __name__ == '__main__': main()