Spaces:

mikepastor11
/

PennwickHoneybeeRobot

Sleeping

App Files Files Community

PennwickHoneybeeRobot / app.py

mikepastor11

Update app.py

fbe26c7 verified over 1 year ago

raw

history blame contribute delete

6.85 kB

	##########################################################################
	# app.py - Pennwick Honeybee Robot
	#
	# HuggingFace Spaces application to provide honeybee expertise
	# with open-source models
	#
	# Mike Pastor February 23, 2024


	import streamlit as st
	from streamlit.components.v1 import html
	# from dotenv import load_dotenv
	from PyPDF2 import PdfReader
	from PIL import Image

	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

	# Local file
	from htmlTemplates import css, bot_template, user_template


	##################################################################################
	# Admin flags
	DISPLAY_DIALOG_LINES = 6

	SESSION_STARTED = False

	# MODEL_NAME="deepset/roberta-base-squad2"
	# MODEL_NAME="BEE-spoke-data/TinyLlama-3T-1.1bee"

	# MODEL_NAME='HuggingFaceH4/zephyr-7b-beta'

	##############################################################
	# Our model and tokenizer
	#
	MODEL_NAME = "facebook/blenderbot-400M-distill"

	# MODEL_NAME = "facebook/blenderbot-3B"

	model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)




	##################################################################################
	def process_user_question(user_question):


	# if not SESSION_STARTED:
	# print('No Session')
	# st.write( 'Please upload and analyze your PDF files first!')
	# return

	if user_question == None:
	print('question is null')
	return
	if user_question == '':
	print('question is blank')
	return
	if st == None:
	print('session is null')
	return
	if st.session_state == None:
	print('session STATE is null')
	return

	print('question is: ', user_question)
	print('\nsession is: ', st)



	#################################################################
	# Track the overall time for training & submission preparation
	# #
	from datetime import datetime
	global_now = datetime.now()
	global_current_time = global_now.strftime("%H:%M:%S")
	print("# app.py Starting up... - Current Time =", global_current_time)

	st.write(('Question: ' + user_question ), unsafe_allow_html=True)


	# input_text = input('Say something--> ')

	print( 'history--> ', st.session_state.history_string)
	################################################################
	# Tokenize the user prompt and conversation history
	inputs = tokenizer.encode_plus( st.session_state.history_string, user_question, return_tensors="pt" )

	# st.write('Len of inputs= ', len( inputs))
	# Generate a response
	outputs = model.generate( **inputs )

	# decode the response
	response = tokenizer.decode( outputs[0], skip_special_tokens=True).strip()

	# append history
	st.session_state.conversation_history.append(user_question)
	st.session_state.conversation_history.append(response)

	# st.session_state.history_string = "/n".join(st.session_state.conversation_history)
	st.session_state.history_string = "<br>".join( st.session_state.conversation_history )

	st.write( 'Response: ', response)


	# Mission Complete!
	##################################################################################
	global_later = datetime.now()
	st.write("Total query execute Time =", (global_later - global_now), global_later)



	#################################################################################
	def main():
	print('Pennwick Starting up...\n')

	##################################################################
	# Initial conversation tracking
	if not hasattr(st.session_state, "conversation_history"):
	st.session_state.conversation_history = []
	if not hasattr(st.session_state, "history_string"):
	st.session_state.history_string = "\n".join(st.session_state.conversation_history)


	# Load the environment variables - if any
	# load_dotenv()

	st.set_page_config(page_title="Pennwick Honeybee Robot",
	page_icon="./HoneybeeLogo.ico")

	st.write(css, unsafe_allow_html=True)

	st.image("./HoneybeeLogo.png", width=96)
	st.header(f"Pennwick Honeybee Robot")
	st.write( "BETA TEST VERSION only!", unsafe_allow_html=True)

	print('Prepared page...\n')


	user_question = None
	user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...")
	if user_question != None:
	print('calling process question', user_question)
	process_user_question(user_question)

	html_history_string = ""
	if len( st.session_state.history_string ) > 100:
	html_history_string = st.session_state.history_string[-100:]
	else:
	html_history_string = st.session_state.history_string

	html(html_history_string , height=150, scrolling=True)
	# st.write( user_template, unsafe_allow_html=True)
	# st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True)
	# st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True)
	#
	# with st.sidebar:
	#
	# st.subheader("Which documents would you like to analyze?")
	# st.subheader("(no data is saved beyond the session)")
	#
	# pdf_docs = st.file_uploader(
	# "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True)
	#
	# # Upon button press
	# if st.button("Analyze these files"):
	# with st.spinner("Processing..."):
	# #################################################################
	# # Track the overall time for file processing into Vectors
	# # #
	# from datetime import datetime
	# global_now = datetime.now()
	# global_current_time = global_now.strftime("%H:%M:%S")
	# st.write("Vectorizing Files - Current Time =", global_current_time)
	#
	# # get pdf text
	# raw_text = extract_pdf_text(pdf_docs)
	# # st.write(raw_text)
	#
	# # # get the text chunks
	# text_chunks = extract_bitesize_pieces(raw_text)
	# # st.write(text_chunks)
	#
	# # # create vector store
	# vectorstore = prepare_embedding_vectors(text_chunks)
	#
	# # # create conversation chain
	# st.session_state.conversation = prepare_conversation(vectorstore)
	#
	# SESSION_STARTED = True
	#
	# # Mission Complete!
	# global_later = datetime.now()
	# st.write("Files Vectorized - Total EXECUTION Time =",
	# (global_later - global_now), global_later)
	#

	if __name__ == '__main__':
	main()