Spaces:
Sleeping
Sleeping
| ########################################################################## | |
| # app.py - Pennwick Honeybee Robot | |
| # | |
| # HuggingFace Spaces application to provide honeybee expertise | |
| # with open-source models | |
| # | |
| # Mike Pastor February 23, 2024 | |
| import streamlit as st | |
| from streamlit.components.v1 import html | |
| # from dotenv import load_dotenv | |
| from PyPDF2 import PdfReader | |
| from PIL import Image | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| # Local file | |
| from htmlTemplates import css, bot_template, user_template | |
| ################################################################################## | |
| # Admin flags | |
| DISPLAY_DIALOG_LINES = 6 | |
| SESSION_STARTED = False | |
| # MODEL_NAME="deepset/roberta-base-squad2" | |
| # MODEL_NAME="BEE-spoke-data/TinyLlama-3T-1.1bee" | |
| # MODEL_NAME='HuggingFaceH4/zephyr-7b-beta' | |
| ############################################################## | |
| # Our model and tokenizer | |
| # | |
| MODEL_NAME = "facebook/blenderbot-400M-distill" | |
| # MODEL_NAME = "facebook/blenderbot-3B" | |
| model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| ################################################################################## | |
| def process_user_question(user_question): | |
| # if not SESSION_STARTED: | |
| # print('No Session') | |
| # st.write( 'Please upload and analyze your PDF files first!') | |
| # return | |
| if user_question == None: | |
| print('question is null') | |
| return | |
| if user_question == '': | |
| print('question is blank') | |
| return | |
| if st == None: | |
| print('session is null') | |
| return | |
| if st.session_state == None: | |
| print('session STATE is null') | |
| return | |
| print('question is: ', user_question) | |
| print('\nsession is: ', st) | |
| ################################################################# | |
| # Track the overall time for training & submission preparation | |
| # # | |
| from datetime import datetime | |
| global_now = datetime.now() | |
| global_current_time = global_now.strftime("%H:%M:%S") | |
| print("# app.py Starting up... - Current Time =", global_current_time) | |
| st.write(('Question: ' + user_question ), unsafe_allow_html=True) | |
| # input_text = input('Say something--> ') | |
| print( 'history--> ', st.session_state.history_string) | |
| ################################################################ | |
| # Tokenize the user prompt and conversation history | |
| inputs = tokenizer.encode_plus( st.session_state.history_string, user_question, return_tensors="pt" ) | |
| # st.write('Len of inputs= ', len( inputs)) | |
| # Generate a response | |
| outputs = model.generate( **inputs ) | |
| # decode the response | |
| response = tokenizer.decode( outputs[0], skip_special_tokens=True).strip() | |
| # append history | |
| st.session_state.conversation_history.append(user_question) | |
| st.session_state.conversation_history.append(response) | |
| # st.session_state.history_string = "/n".join(st.session_state.conversation_history) | |
| st.session_state.history_string = "<br>".join( st.session_state.conversation_history ) | |
| st.write( 'Response: ', response) | |
| # Mission Complete! | |
| ################################################################################## | |
| global_later = datetime.now() | |
| st.write("Total query execute Time =", (global_later - global_now), global_later) | |
| ################################################################################# | |
| def main(): | |
| print('Pennwick Starting up...\n') | |
| ################################################################## | |
| # Initial conversation tracking | |
| if not hasattr(st.session_state, "conversation_history"): | |
| st.session_state.conversation_history = [] | |
| if not hasattr(st.session_state, "history_string"): | |
| st.session_state.history_string = "\n".join(st.session_state.conversation_history) | |
| # Load the environment variables - if any | |
| # load_dotenv() | |
| st.set_page_config(page_title="Pennwick Honeybee Robot", | |
| page_icon="./HoneybeeLogo.ico") | |
| st.write(css, unsafe_allow_html=True) | |
| st.image("./HoneybeeLogo.png", width=96) | |
| st.header(f"Pennwick Honeybee Robot") | |
| st.write( "BETA TEST VERSION only!", unsafe_allow_html=True) | |
| print('Prepared page...\n') | |
| user_question = None | |
| user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...") | |
| if user_question != None: | |
| print('calling process question', user_question) | |
| process_user_question(user_question) | |
| html_history_string = "" | |
| if len( st.session_state.history_string ) > 100: | |
| html_history_string = st.session_state.history_string[-100:] | |
| else: | |
| html_history_string = st.session_state.history_string | |
| html(html_history_string , height=150, scrolling=True) | |
| # st.write( user_template, unsafe_allow_html=True) | |
| # st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True) | |
| # st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True) | |
| # | |
| # with st.sidebar: | |
| # | |
| # st.subheader("Which documents would you like to analyze?") | |
| # st.subheader("(no data is saved beyond the session)") | |
| # | |
| # pdf_docs = st.file_uploader( | |
| # "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True) | |
| # | |
| # # Upon button press | |
| # if st.button("Analyze these files"): | |
| # with st.spinner("Processing..."): | |
| # ################################################################# | |
| # # Track the overall time for file processing into Vectors | |
| # # # | |
| # from datetime import datetime | |
| # global_now = datetime.now() | |
| # global_current_time = global_now.strftime("%H:%M:%S") | |
| # st.write("Vectorizing Files - Current Time =", global_current_time) | |
| # | |
| # # get pdf text | |
| # raw_text = extract_pdf_text(pdf_docs) | |
| # # st.write(raw_text) | |
| # | |
| # # # get the text chunks | |
| # text_chunks = extract_bitesize_pieces(raw_text) | |
| # # st.write(text_chunks) | |
| # | |
| # # # create vector store | |
| # vectorstore = prepare_embedding_vectors(text_chunks) | |
| # | |
| # # # create conversation chain | |
| # st.session_state.conversation = prepare_conversation(vectorstore) | |
| # | |
| # SESSION_STARTED = True | |
| # | |
| # # Mission Complete! | |
| # global_later = datetime.now() | |
| # st.write("Files Vectorized - Total EXECUTION Time =", | |
| # (global_later - global_now), global_later) | |
| # | |
| if __name__ == '__main__': | |
| main() | |