mikepastor11's picture
Update app.py
fbe26c7 verified
##########################################################################
# app.py - Pennwick Honeybee Robot
#
# HuggingFace Spaces application to provide honeybee expertise
# with open-source models
#
# Mike Pastor February 23, 2024
import streamlit as st
from streamlit.components.v1 import html
# from dotenv import load_dotenv
from PyPDF2 import PdfReader
from PIL import Image
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Local file
from htmlTemplates import css, bot_template, user_template
##################################################################################
# Admin flags
DISPLAY_DIALOG_LINES = 6
SESSION_STARTED = False
# MODEL_NAME="deepset/roberta-base-squad2"
# MODEL_NAME="BEE-spoke-data/TinyLlama-3T-1.1bee"
# MODEL_NAME='HuggingFaceH4/zephyr-7b-beta'
##############################################################
# Our model and tokenizer
#
MODEL_NAME = "facebook/blenderbot-400M-distill"
# MODEL_NAME = "facebook/blenderbot-3B"
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
##################################################################################
def process_user_question(user_question):
# if not SESSION_STARTED:
# print('No Session')
# st.write( 'Please upload and analyze your PDF files first!')
# return
if user_question == None:
print('question is null')
return
if user_question == '':
print('question is blank')
return
if st == None:
print('session is null')
return
if st.session_state == None:
print('session STATE is null')
return
print('question is: ', user_question)
print('\nsession is: ', st)
#################################################################
# Track the overall time for training & submission preparation
# #
from datetime import datetime
global_now = datetime.now()
global_current_time = global_now.strftime("%H:%M:%S")
print("# app.py Starting up... - Current Time =", global_current_time)
st.write(('Question: ' + user_question ), unsafe_allow_html=True)
# input_text = input('Say something--> ')
print( 'history--> ', st.session_state.history_string)
################################################################
# Tokenize the user prompt and conversation history
inputs = tokenizer.encode_plus( st.session_state.history_string, user_question, return_tensors="pt" )
# st.write('Len of inputs= ', len( inputs))
# Generate a response
outputs = model.generate( **inputs )
# decode the response
response = tokenizer.decode( outputs[0], skip_special_tokens=True).strip()
# append history
st.session_state.conversation_history.append(user_question)
st.session_state.conversation_history.append(response)
# st.session_state.history_string = "/n".join(st.session_state.conversation_history)
st.session_state.history_string = "<br>".join( st.session_state.conversation_history )
st.write( 'Response: ', response)
# Mission Complete!
##################################################################################
global_later = datetime.now()
st.write("Total query execute Time =", (global_later - global_now), global_later)
#################################################################################
def main():
print('Pennwick Starting up...\n')
##################################################################
# Initial conversation tracking
if not hasattr(st.session_state, "conversation_history"):
st.session_state.conversation_history = []
if not hasattr(st.session_state, "history_string"):
st.session_state.history_string = "\n".join(st.session_state.conversation_history)
# Load the environment variables - if any
# load_dotenv()
st.set_page_config(page_title="Pennwick Honeybee Robot",
page_icon="./HoneybeeLogo.ico")
st.write(css, unsafe_allow_html=True)
st.image("./HoneybeeLogo.png", width=96)
st.header(f"Pennwick Honeybee Robot")
st.write( "BETA TEST VERSION only!", unsafe_allow_html=True)
print('Prepared page...\n')
user_question = None
user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...")
if user_question != None:
print('calling process question', user_question)
process_user_question(user_question)
html_history_string = ""
if len( st.session_state.history_string ) > 100:
html_history_string = st.session_state.history_string[-100:]
else:
html_history_string = st.session_state.history_string
html(html_history_string , height=150, scrolling=True)
# st.write( user_template, unsafe_allow_html=True)
# st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
# st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
#
# with st.sidebar:
#
# st.subheader("Which documents would you like to analyze?")
# st.subheader("(no data is saved beyond the session)")
#
# pdf_docs = st.file_uploader(
# "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
#
# # Upon button press
# if st.button("Analyze these files"):
# with st.spinner("Processing..."):
# #################################################################
# # Track the overall time for file processing into Vectors
# # #
# from datetime import datetime
# global_now = datetime.now()
# global_current_time = global_now.strftime("%H:%M:%S")
# st.write("Vectorizing Files - Current Time =", global_current_time)
#
# # get pdf text
# raw_text = extract_pdf_text(pdf_docs)
# # st.write(raw_text)
#
# # # get the text chunks
# text_chunks = extract_bitesize_pieces(raw_text)
# # st.write(text_chunks)
#
# # # create vector store
# vectorstore = prepare_embedding_vectors(text_chunks)
#
# # # create conversation chain
# st.session_state.conversation = prepare_conversation(vectorstore)
#
# SESSION_STARTED = True
#
# # Mission Complete!
# global_later = datetime.now()
# st.write("Files Vectorized - Total EXECUTION Time =",
# (global_later - global_now), global_later)
#
if __name__ == '__main__':
main()